diff options
author | David Symonds <dsymonds@golang.org> | 2011-06-09 10:18:36 +1000 |
---|---|---|
committer | David Symonds <dsymonds@golang.org> | 2011-06-09 10:18:36 +1000 |
commit | ffd01f2a479af55cb12566bc2ba7e38b4be8cf01 (patch) | |
tree | 8736a96a973c3655e74ced1e5028d542fa5035c3 | |
parent | 833529fd6f5b1cc469a080980275ace3d43ade49 (diff) | |
download | go-ffd01f2a479af55cb12566bc2ba7e38b4be8cf01.tar.gz go-ffd01f2a479af55cb12566bc2ba7e38b4be8cf01.zip |
mail: decode "Q"-encoded mail addresses.
Supports ISO-8859-1 and UTF-8.
R=rsc, bradfitz
CC=golang-dev
https://golang.org/cl/4568064
-rw-r--r-- | src/pkg/mail/message.go | 48 | ||||
-rw-r--r-- | src/pkg/mail/message_test.go | 21 |
2 files changed, 68 insertions, 1 deletions
diff --git a/src/pkg/mail/message.go b/src/pkg/mail/message.go index 377a8d3943..ca818ebde4 100644 --- a/src/pkg/mail/message.go +++ b/src/pkg/mail/message.go @@ -23,6 +23,7 @@ import ( "log" "net/textproto" "os" + "strconv" "strings" "time" ) @@ -340,7 +341,13 @@ func (p *addrParser) consumePhrase() (phrase string, err os.Error) { debug.Printf("consumePhrase: hit err: %v", err) return "", os.ErrorString("mail: missing word in phrase") } - return strings.Join(words, " "), nil + phrase = strings.Join(words, " ") + + // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s. + if strings.HasPrefix(phrase, "=?") && strings.HasSuffix(phrase, "?=") && strings.Count(phrase, "?") == 4 { + return decodeRFC2047Word(phrase) + } + return phrase, nil } // consumeQuotedString parses the quoted string at the start of p. @@ -414,6 +421,45 @@ func (p *addrParser) len() int { return len(*p) } +func decodeRFC2047Word(s string) (string, os.Error) { + fields := strings.Split(s, "?", -1) + if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" { + return "", os.ErrorString("mail: address not RFC 2047 encoded") + } + charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2]) + // TODO(dsymonds): Support "b" encoding too. + if enc != "q" { + return "", fmt.Errorf("mail: RFC 2047 encoding not supported: %q", enc) + } + if charset != "iso-8859-1" && charset != "utf-8" { + return "", fmt.Errorf("mail: charset not supported: %q", charset) + } + + in := fields[3] + b := new(bytes.Buffer) + for i := 0; i < len(in); i++ { + switch c := in[i]; { + case c == '=' && i+2 < len(in): + x, err := strconv.Btoi64(in[i+1:i+3], 16) + if err != nil { + return "", fmt.Errorf("mail: invalid RFC 2047 encoding: %q", in[i:i+3]) + } + i += 2 + switch charset { + case "iso-8859-1": + b.WriteRune(int(x)) + case "utf-8": + b.WriteByte(byte(x)) + } + case c == '_': + b.WriteByte(' ') + default: + b.WriteByte(c) + } + } + return b.String(), nil +} + var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789" + diff --git a/src/pkg/mail/message_test.go b/src/pkg/mail/message_test.go index 731a748ede..92e9ef8de7 100644 --- a/src/pkg/mail/message_test.go +++ b/src/pkg/mail/message_test.go @@ -186,6 +186,27 @@ func TestAddressParsing(t *testing.T) { }, // RFC 5322, Appendix A.1.3 // TODO(dsymonds): Group addresses. + + // RFC 2047 "Q"-encoded ISO-8859-1 address. + { + `=?iso-8859-1?q?J=F6rg_Doe?= <joerg@example.com>`, + []*Address{ + &Address{ + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded UTF-8 address. + { + `=?utf-8?q?J=C3=B6rg_Doe?= <joerg@example.com>`, + []*Address{ + &Address{ + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, } for _, test := range tests { addrs, err := newAddrParser(test.addrsStr).parseAddressList() |