// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

/* Package mail implements parsing of mail messages. For the most part, this package follows the syntax as specified by RFC 5322 and extended by RFC 6532. Notable divergences: - Obsolete address formats are not parsed, including addresses with embedded route information. - The full range of spacing (the CFWS syntax element) is not supported, such as breaking addresses across lines. - No unicode normalization is performed. - A leading From line is permitted, as in mbox format (RFC 4155). */
package mail import ( ) var debug = debugT(false) type debugT bool func ( debugT) ( string, ...any) { if { log.Printf(, ...) } } // A Message represents a parsed mail message. type Message struct { Header Header Body io.Reader } // ReadMessage reads a message from r. // The headers are parsed, and the body of the message will be available // for reading from msg.Body. func ( io.Reader) ( *Message, error) { := textproto.NewReader(bufio.NewReader()) , := readHeader() if != nil && ( != io.EOF || len() == 0) { return nil, } return &Message{ Header: Header(), Body: .R, }, nil } // readHeader reads the message headers from r. // This is like textproto.ReadMIMEHeader, but doesn't validate. // The fix for issue #53188 tightened up net/textproto to enforce // restrictions of RFC 7230. // This package implements RFC 5322, which does not have those restrictions. // This function copies the relevant code from net/textproto, // simplified for RFC 5322. func readHeader( *textproto.Reader) (map[string][]string, error) { := make(map[string][]string) // The first line cannot start with a leading space. if , := .R.Peek(1); == nil && ([0] == ' ' || [0] == '\t') { , := .ReadLine() if != nil { return , } return , errors.New("malformed initial line: " + ) } for { , := .ReadContinuedLine() if == "" { return , } // Key ends at first colon. , , := strings.Cut(, ":") if ! { return , errors.New("malformed header line: " + ) } := textproto.CanonicalMIMEHeaderKey() // Permit empty key, because that is what we did in the past. if == "" { continue } // Skip initial spaces in value. := strings.TrimLeft(, " \t") [] = append([], ) if != nil { return , } } } // Layouts suitable for passing to time.Parse. // These are tried in order. var ( dateLayoutsBuildOnce sync.Once dateLayouts []string ) func buildDateLayouts() { // Generate layouts based on RFC 5322, section 3.3. := [...]string{"", "Mon, "} // day-of-week := [...]string{"2", "02"} // day = 1*2DIGIT := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT := [...]string{":05", ""} // second // "-0700 (MST)" is not in RFC 5322, but is common. := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ... for , := range { for , := range { for , := range { for , := range { for , := range { := + + " Jan " + + " 15:04" + + " " + dateLayouts = append(dateLayouts, ) } } } } } } // ParseDate parses an RFC 5322 date string. func ( string) (time.Time, error) { dateLayoutsBuildOnce.Do(buildDateLayouts) // CR and LF must match and are tolerated anywhere in the date field. = strings.ReplaceAll(, "\r\n", "") if strings.Contains(, "\r") { return time.Time{}, errors.New("mail: header has a CR without LF") } // Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII := addrParser{, nil} .skipSpace() // RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone // zone length is always 5 chars unless obsolete (obs-zone) if := strings.IndexAny(.s, "+-"); != -1 && len(.s) >= +5 { = .s[:+5] .s = .s[+5:] } else { := strings.Index(.s, "T") if == 0 { // In this case we have the following date formats: // * Thu, 20 Nov 1997 09:55:06 MDT // * Thu, 20 Nov 1997 09:55:06 MDT (MDT) // * Thu, 20 Nov 1997 09:55:06 MDT (This comment) = strings.Index(.s[1:], "T") if != -1 { ++ } } if != -1 && len(.s) >= +5 { // The last letter T of the obsolete time zone is checked when no standard time zone is found. // If T is misplaced, the date to parse is garbage. = .s[:+1] .s = .s[+1:] } } if !.skipCFWS() { return time.Time{}, errors.New("mail: misformatted parenthetical comment") } for , := range dateLayouts { , := time.Parse(, ) if == nil { return , nil } } return time.Time{}, errors.New("mail: header could not be parsed") } // A Header represents the key-value pairs in a mail message header. type Header map[string][]string // Get gets the first value associated with the given key. // It is case insensitive; CanonicalMIMEHeaderKey is used // to canonicalize the provided key. // If there are no values associated with the key, Get returns "". // To access multiple values of a key, or to use non-canonical keys, // access the map directly. func ( Header) ( string) string { return textproto.MIMEHeader().Get() } var ErrHeaderNotPresent = errors.New("mail: header not in message") // Date parses the Date header field. func ( Header) () (time.Time, error) { := .Get("Date") if == "" { return time.Time{}, ErrHeaderNotPresent } return ParseDate() } // AddressList parses the named header field as a list of addresses. func ( Header) ( string) ([]*Address, error) { := .Get() if == "" { return nil, ErrHeaderNotPresent } return ParseAddressList() } // Address represents a single mail address. // An address such as "Barry Gibbs <bg@example.com>" is represented // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. type Address struct { Name string // Proper name; may be empty. Address string // user@domain } // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" func ( string) (*Address, error) { return (&addrParser{s: }).parseSingleAddress() } // ParseAddressList parses the given string as a list of addresses. func ( string) ([]*Address, error) { return (&addrParser{s: }).parseAddressList() } // An AddressParser is an RFC 5322 address parser. type AddressParser struct { // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. WordDecoder *mime.WordDecoder } // Parse parses a single RFC 5322 address of the // form "Gogh Fir <gf@example.com>" or "foo@example.com". func ( *AddressParser) ( string) (*Address, error) { return (&addrParser{s: , dec: .WordDecoder}).parseSingleAddress() } // ParseList parses the given string as a list of comma-separated addresses // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". func ( *AddressParser) ( string) ([]*Address, error) { return (&addrParser{s: , dec: .WordDecoder}).parseAddressList() } // String formats the address as a valid RFC 5322 address. // If the address's name contains non-ASCII characters // the name will be rendered according to RFC 2047. func ( *Address) () string { // Format address local@domain := strings.LastIndex(.Address, "@") var , string if < 0 { // This is a malformed address ("@" is required in addr-spec); // treat the whole address as local-part. = .Address } else { , = .Address[:], .Address[+1:] } // Add quotes if needed := false for , := range { if isAtext(, false) { continue } if == '.' { // Dots are okay if they are surrounded by atext. // We only need to check that the previous byte is // not a dot, and this isn't the end of the string. if > 0 && [-1] != '.' && < len()-1 { continue } } = true break } if { = quoteString() } := "<" + + "@" + + ">" if .Name == "" { return } // If every character is printable ASCII, quoting is simple. := true for , := range .Name { // isWSP here should actually be isFWS, // but we don't support folding yet. if !isVchar() && !isWSP() || isMultibyte() { = false break } } if { return quoteString(.Name) + " " + } // Text in an encoded-word in a display-name must not contain certain // characters like quotes or parentheses (see RFC 2047 section 5.3). // When this is the case encode the name using base64 encoding. if strings.ContainsAny(.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { return mime.BEncoding.Encode("utf-8", .Name) + " " + } return mime.QEncoding.Encode("utf-8", .Name) + " " + } type addrParser struct { s string dec *mime.WordDecoder // may be nil } func ( *addrParser) () ([]*Address, error) { var []*Address for { .skipSpace() // allow skipping empty entries (RFC5322 obs-addr-list) if .consume(',') { continue } , := .parseAddress(true) if != nil { return nil, } = append(, ...) if !.skipCFWS() { return nil, errors.New("mail: misformatted parenthetical comment") } if .empty() { break } if .peek() != ',' { return nil, errors.New("mail: expected comma") } // Skip empty entries for obs-addr-list. for .consume(',') { .skipSpace() } if .empty() { break } } return , nil } func ( *addrParser) () (*Address, error) { , := .parseAddress(true) if != nil { return nil, } if !.skipCFWS() { return nil, errors.New("mail: misformatted parenthetical comment") } if !.empty() { return nil, fmt.Errorf("mail: expected single address, got %q", .s) } if len() == 0 { return nil, errors.New("mail: empty group") } if len() > 1 { return nil, errors.New("mail: group with multiple addresses") } return [0], nil } // parseAddress parses a single RFC 5322 address at the start of p. func ( *addrParser) ( bool) ([]*Address, error) { debug.Printf("parseAddress: %q", .s) .skipSpace() if .empty() { return nil, errors.New("mail: no address") } // address = mailbox / group // mailbox = name-addr / addr-spec // group = display-name ":" [group-list] ";" [CFWS] // addr-spec has a more restricted grammar than name-addr, // so try parsing it first, and fallback to name-addr. // TODO(dsymonds): Is this really correct? , := .consumeAddrSpec() if == nil { var string .skipSpace() if !.empty() && .peek() == '(' { , = .consumeDisplayNameComment() if != nil { return nil, } } return []*Address{{ Name: , Address: , }}, } debug.Printf("parseAddress: not an addr-spec: %v", ) debug.Printf("parseAddress: state is now %q", .s) // display-name var string if .peek() != '<' { , = .consumePhrase() if != nil { return nil, } } debug.Printf("parseAddress: displayName=%q", ) .skipSpace() if { if .consume(':') { return .consumeGroupList() } } // angle-addr = "<" addr-spec ">" if !.consume('<') { := true for , := range { if !isAtext(, true) { = false break } } if { // The input is like "foo.bar"; it's possible the input // meant to be "foo.bar@domain", or "foo.bar <...>". return nil, errors.New("mail: missing '@' or angle-addr") } // The input is like "Full Name", which couldn't possibly be a // valid email address if followed by "@domain"; the input // likely meant to be "Full Name <...>". return nil, errors.New("mail: no angle-addr") } , = .consumeAddrSpec() if != nil { return nil, } if !.consume('>') { return nil, errors.New("mail: unclosed angle-addr") } debug.Printf("parseAddress: spec=%q", ) return []*Address{{ Name: , Address: , }}, nil } func ( *addrParser) () ([]*Address, error) { var []*Address // handle empty group. .skipSpace() if .consume(';') { if !.skipCFWS() { return nil, errors.New("mail: misformatted parenthetical comment") } return , nil } for { .skipSpace() // embedded groups not allowed. , := .parseAddress(false) if != nil { return nil, } = append(, ...) if !.skipCFWS() { return nil, errors.New("mail: misformatted parenthetical comment") } if .consume(';') { if !.skipCFWS() { return nil, errors.New("mail: misformatted parenthetical comment") } break } if !.consume(',') { return nil, errors.New("mail: expected comma") } } return , nil } // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. func ( *addrParser) () ( string, error) { debug.Printf("consumeAddrSpec: %q", .s) := * defer func() { if != nil { * = } }() // local-part = dot-atom / quoted-string var string .skipSpace() if .empty() { return "", errors.New("mail: no addr-spec") } if .peek() == '"' { // quoted-string debug.Printf("consumeAddrSpec: parsing quoted-string") , = .consumeQuotedString() if == "" { = errors.New("mail: empty quoted string in addr-spec") } } else { // dot-atom debug.Printf("consumeAddrSpec: parsing dot-atom") , = .consumeAtom(true, false) } if != nil { debug.Printf("consumeAddrSpec: failed: %v", ) return "", } if !.consume('@') { return "", errors.New("mail: missing @ in addr-spec") } // domain = dot-atom / domain-literal var string .skipSpace() if .empty() { return "", errors.New("mail: no domain in addr-spec") } if .peek() == '[' { // domain-literal , = .consumeDomainLiteral() if != nil { return "", } } else { // dot-atom , = .consumeAtom(true, false) if != nil { return "", } } return + "@" + , nil } // consumePhrase parses the RFC 5322 phrase at the start of p. func ( *addrParser) () ( string, error) { debug.Printf("consumePhrase: [%s]", .s) // phrase = 1*word var []string var bool for { // obs-phrase allows CFWS after one word if len() > 0 { if !.skipCFWS() { return "", errors.New("mail: misformatted parenthetical comment") } } // word = atom / quoted-string var string .skipSpace() if .empty() { break } := false if .peek() == '"' { // quoted-string , = .consumeQuotedString() } else { // atom // We actually parse dot-atom here to be more permissive // than what RFC 5322 specifies. , = .consumeAtom(true, true) if == nil { , , = .decodeRFC2047Word() } } if != nil { break } debug.Printf("consumePhrase: consumed %q", ) if && { [len()-1] += } else { = append(, ) } = } // Ignore any error if we got at least one word. if != nil && len() == 0 { debug.Printf("consumePhrase: hit err: %v", ) return "", fmt.Errorf("mail: missing word in phrase: %v", ) } = strings.Join(, " ") return , nil } // consumeQuotedString parses the quoted string at the start of p. func ( *addrParser) () ( string, error) { // Assume first byte is '"'. := 1 := make([]rune, 0, 10) := false : for { , := utf8.DecodeRuneInString(.s[:]) switch { case == 0: return "", errors.New("mail: unclosed quoted-string") case == 1 && == utf8.RuneError: return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", .s) case : // quoted-pair = ("\" (VCHAR / WSP)) if !isVchar() && !isWSP() { return "", fmt.Errorf("mail: bad character in quoted-string: %q", ) } = append(, ) = false case isQtext() || isWSP(): // qtext (printable US-ASCII excluding " and \), or // FWS (almost; we're ignoring CRLF) = append(, ) case == '"': break case == '\\': = true default: return "", fmt.Errorf("mail: bad character in quoted-string: %q", ) } += } .s = .s[+1:] return string(), nil } // consumeAtom parses an RFC 5322 atom at the start of p. // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. // If permissive is true, consumeAtom will not fail on: // - leading/trailing/double dots in the atom (see golang.org/issue/4938) func ( *addrParser) ( bool, bool) ( string, error) { := 0 : for { , := utf8.DecodeRuneInString(.s[:]) switch { case == 1 && == utf8.RuneError: return "", fmt.Errorf("mail: invalid utf-8 in address: %q", .s) case == 0 || !isAtext(, ): break default: += } } if == 0 { return "", errors.New("mail: invalid string") } , .s = .s[:], .s[:] if ! { if strings.HasPrefix(, ".") { return "", errors.New("mail: leading dot in atom") } if strings.Contains(, "..") { return "", errors.New("mail: double dot in atom") } if strings.HasSuffix(, ".") { return "", errors.New("mail: trailing dot in atom") } } return , nil } // consumeDomainLiteral parses an RFC 5322 domain-literal at the start of p. func ( *addrParser) () (string, error) { // Skip the leading [ if !.consume('[') { return "", errors.New(`mail: missing "[" in domain-literal`) } // Parse the dtext var string for { if .empty() { return "", errors.New("mail: unclosed domain-literal") } if .peek() == ']' { break } , := utf8.DecodeRuneInString(.s) if == 1 && == utf8.RuneError { return "", fmt.Errorf("mail: invalid utf-8 in domain-literal: %q", .s) } if !isDtext() { return "", fmt.Errorf("mail: bad character in domain-literal: %q", ) } += .s[:] .s = .s[:] } // Skip the trailing ] if !.consume(']') { return "", errors.New("mail: unclosed domain-literal") } // Check if the domain literal is an IP address if net.ParseIP() == nil { return "", fmt.Errorf("mail: invalid IP address in domain-literal: %q", ) } return "[" + + "]", nil } func ( *addrParser) () (string, error) { if !.consume('(') { return "", errors.New("mail: comment does not start with (") } , := .consumeComment() if ! { return "", errors.New("mail: misformatted parenthetical comment") } // TODO(stapelberg): parse quoted-string within comment := strings.FieldsFunc(, func( rune) bool { return == ' ' || == '\t' }) for , := range { , , := .decodeRFC2047Word() if != nil { return "", } if { [] = } } return strings.Join(, " "), nil } func ( *addrParser) ( byte) bool { if .empty() || .peek() != { return false } .s = .s[1:] return true } // skipSpace skips the leading space and tab characters. func ( *addrParser) () { .s = strings.TrimLeft(.s, " \t") } func ( *addrParser) () byte { return .s[0] } func ( *addrParser) () bool { return .len() == 0 } func ( *addrParser) () int { return len(.s) } // skipCFWS skips CFWS as defined in RFC5322. func ( *addrParser) () bool { .skipSpace() for { if !.consume('(') { break } if , := .consumeComment(); ! { return false } .skipSpace() } return true } func ( *addrParser) () (string, bool) { // '(' already consumed. := 1 var string for { if .empty() || == 0 { break } if .peek() == '\\' && .len() > 1 { .s = .s[1:] } else if .peek() == '(' { ++ } else if .peek() == ')' { -- } if > 0 { += .s[:1] } .s = .s[1:] } return , == 0 } func ( *addrParser) ( string) ( string, bool, error) { := .dec if == nil { = &rfc2047Decoder } // Substitute our own CharsetReader function so that we can tell // whether an error from the Decode method was due to the // CharsetReader (meaning the charset is invalid). // We used to look for the charsetError type in the error result, // but that behaves badly with CharsetReaders other than the // one in rfc2047Decoder. := * := false .CharsetReader = func( string, io.Reader) (io.Reader, error) { if .CharsetReader == nil { = true return nil, charsetError() } , := .CharsetReader(, ) if != nil { = true } return , } , = .Decode() if == nil { return , true, nil } // If the error came from the character set reader // (meaning the character set itself is invalid // but the decoding worked fine until then), // return the original text and the error, // with isEncoded=true. if { return , true, } // Ignore invalid RFC 2047 encoded-word errors. return , false, nil } var rfc2047Decoder = mime.WordDecoder{ CharsetReader: func( string, io.Reader) (io.Reader, error) { return nil, charsetError() }, } type charsetError string func ( charsetError) () string { return fmt.Sprintf("charset not supported: %q", string()) } // isAtext reports whether r is an RFC 5322 atext character. // If dot is true, period is included. func isAtext( rune, bool) bool { switch { case '.': return // RFC 5322 3.2.3. specials case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials return false } return isVchar() } // isQtext reports whether r is an RFC 5322 qtext character. func isQtext( rune) bool { // Printable US-ASCII, excluding backslash or quote. if == '\\' || == '"' { return false } return isVchar() } // quoteString renders a string as an RFC 5322 quoted-string. func quoteString( string) string { var strings.Builder .WriteByte('"') for , := range { if isQtext() || isWSP() { .WriteRune() } else if isVchar() { .WriteByte('\\') .WriteRune() } } .WriteByte('"') return .String() } // isVchar reports whether r is an RFC 5322 VCHAR character. func isVchar( rune) bool { // Visible (printing) characters. return '!' <= && <= '~' || isMultibyte() } // isMultibyte reports whether r is a multi-byte UTF-8 character // as supported by RFC 6532. func isMultibyte( rune) bool { return >= utf8.RuneSelf } // isWSP reports whether r is a WSP (white space). // WSP is a space or horizontal tab (RFC 5234 Appendix B). func isWSP( rune) bool { return == ' ' || == '\t' } // isDtext reports whether r is an RFC 5322 dtext character. func isDtext( rune) bool { // Printable US-ASCII, excluding "[", "]", or "\". if == '[' || == ']' || == '\\' { return false } return isVchar() }