// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
/*Package mail implements parsing of mail messages.For the most part, this package follows the syntax as specified by RFC 5322 andextended by RFC 6532.Notable divergences: - Obsolete address formats are not parsed, including addresses with embedded route information. - The full range of spacing (the CFWS syntax element) is not supported, such as breaking addresses across lines. - No unicode normalization is performed. - A leading From line is permitted, as in mbox format (RFC 4155).*/
package mailimport ()var debug = debugT(false)type debugT boolfunc ( debugT) ( string, ...any) {if {log.Printf(, ...) }}// A Message represents a parsed mail message.typeMessagestruct { Header Header Body io.Reader}// ReadMessage reads a message from r.// The headers are parsed, and the body of the message will be available// for reading from msg.Body.func ( io.Reader) ( *Message, error) { := textproto.NewReader(bufio.NewReader()) , := readHeader()if != nil && ( != io.EOF || len() == 0) {returnnil, }return &Message{Header: Header(),Body: .R, }, nil}// readHeader reads the message headers from r.// This is like textproto.ReadMIMEHeader, but doesn't validate.// The fix for issue #53188 tightened up net/textproto to enforce// restrictions of RFC 7230.// This package implements RFC 5322, which does not have those restrictions.// This function copies the relevant code from net/textproto,// simplified for RFC 5322.func readHeader( *textproto.Reader) (map[string][]string, error) { := make(map[string][]string)// The first line cannot start with a leading space.if , := .R.Peek(1); == nil && ([0] == ' ' || [0] == '\t') { , := .ReadLine()if != nil {return , }return , errors.New("malformed initial line: " + ) }for { , := .ReadContinuedLine()if == "" {return , }// Key ends at first colon. , , := strings.Cut(, ":")if ! {return , errors.New("malformed header line: " + ) } := textproto.CanonicalMIMEHeaderKey()// Permit empty key, because that is what we did in the past.if == "" {continue }// Skip initial spaces in value. := strings.TrimLeft(, " \t") [] = append([], )if != nil {return , } }}// Layouts suitable for passing to time.Parse.// These are tried in order.var ( dateLayoutsBuildOnce sync.Once dateLayouts []string)func buildDateLayouts() {// Generate layouts based on RFC 5322, section 3.3. := [...]string{"", "Mon, "} // day-of-week := [...]string{"2", "02"} // day = 1*2DIGIT := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT := [...]string{":05", ""} // second// "-0700 (MST)" is not in RFC 5322, but is common. := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ...for , := range {for , := range {for , := range {for , := range {for , := range { := + + " Jan " + + " 15:04" + + " " + dateLayouts = append(dateLayouts, ) } } } } }}// ParseDate parses an RFC 5322 date string.func ( string) (time.Time, error) {dateLayoutsBuildOnce.Do(buildDateLayouts)// CR and LF must match and are tolerated anywhere in the date field. = strings.ReplaceAll(, "\r\n", "")ifstrings.Contains(, "\r") {returntime.Time{}, errors.New("mail: header has a CR without LF") }// Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII := addrParser{, nil} .skipSpace()// RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone // zone length is always 5 chars unless obsolete (obs-zone)if := strings.IndexAny(.s, "+-"); != -1 && len(.s) >= +5 { = .s[:+5] .s = .s[+5:] } else { := strings.Index(.s, "T")if == 0 {// In this case we have the following date formats: // * Thu, 20 Nov 1997 09:55:06 MDT // * Thu, 20 Nov 1997 09:55:06 MDT (MDT) // * Thu, 20 Nov 1997 09:55:06 MDT (This comment) = strings.Index(.s[1:], "T")if != -1 { ++ } }if != -1 && len(.s) >= +5 {// The last letter T of the obsolete time zone is checked when no standard time zone is found. // If T is misplaced, the date to parse is garbage. = .s[:+1] .s = .s[+1:] } }if !.skipCFWS() {returntime.Time{}, errors.New("mail: misformatted parenthetical comment") }for , := rangedateLayouts { , := time.Parse(, )if == nil {return , nil } }returntime.Time{}, errors.New("mail: header could not be parsed")}// A Header represents the key-value pairs in a mail message header.typeHeadermap[string][]string// Get gets the first value associated with the given key.// It is case insensitive; CanonicalMIMEHeaderKey is used// to canonicalize the provided key.// If there are no values associated with the key, Get returns "".// To access multiple values of a key, or to use non-canonical keys,// access the map directly.func ( Header) ( string) string {returntextproto.MIMEHeader().Get()}varErrHeaderNotPresent = errors.New("mail: header not in message")// Date parses the Date header field.func ( Header) () (time.Time, error) { := .Get("Date")if == "" {returntime.Time{}, ErrHeaderNotPresent }returnParseDate()}// AddressList parses the named header field as a list of addresses.func ( Header) ( string) ([]*Address, error) { := .Get()if == "" {returnnil, ErrHeaderNotPresent }returnParseAddressList()}// Address represents a single mail address.// An address such as "Barry Gibbs <bg@example.com>" is represented// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.typeAddressstruct { Name string// Proper name; may be empty. Address string// user@domain}// ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"func ( string) (*Address, error) {return (&addrParser{s: }).parseSingleAddress()}// ParseAddressList parses the given string as a list of addresses.func ( string) ([]*Address, error) {return (&addrParser{s: }).parseAddressList()}// An AddressParser is an RFC 5322 address parser.typeAddressParserstruct {// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. WordDecoder *mime.WordDecoder}// Parse parses a single RFC 5322 address of the// form "Gogh Fir <gf@example.com>" or "foo@example.com".func ( *AddressParser) ( string) (*Address, error) {return (&addrParser{s: , dec: .WordDecoder}).parseSingleAddress()}// ParseList parses the given string as a list of comma-separated addresses// of the form "Gogh Fir <gf@example.com>" or "foo@example.com".func ( *AddressParser) ( string) ([]*Address, error) {return (&addrParser{s: , dec: .WordDecoder}).parseAddressList()}// String formats the address as a valid RFC 5322 address.// If the address's name contains non-ASCII characters// the name will be rendered according to RFC 2047.func ( *Address) () string {// Format address local@domain := strings.LastIndex(.Address, "@")var , stringif < 0 {// This is a malformed address ("@" is required in addr-spec); // treat the whole address as local-part. = .Address } else { , = .Address[:], .Address[+1:] }// Add quotes if needed := falsefor , := range {ifisAtext(, false) {continue }if == '.' {// Dots are okay if they are surrounded by atext. // We only need to check that the previous byte is // not a dot, and this isn't the end of the string.if > 0 && [-1] != '.' && < len()-1 {continue } } = truebreak }if { = quoteString() } := "<" + + "@" + + ">"if .Name == "" {return }// If every character is printable ASCII, quoting is simple. := truefor , := range .Name {// isWSP here should actually be isFWS, // but we don't support folding yet.if !isVchar() && !isWSP() || isMultibyte() { = falsebreak } }if {returnquoteString(.Name) + " " + }// Text in an encoded-word in a display-name must not contain certain // characters like quotes or parentheses (see RFC 2047 section 5.3). // When this is the case encode the name using base64 encoding.ifstrings.ContainsAny(.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {returnmime.BEncoding.Encode("utf-8", .Name) + " " + }returnmime.QEncoding.Encode("utf-8", .Name) + " " + }type addrParser struct { s string dec *mime.WordDecoder// may be nil}func ( *addrParser) () ([]*Address, error) {var []*Addressfor { .skipSpace()// allow skipping empty entries (RFC5322 obs-addr-list)if .consume(',') {continue } , := .parseAddress(true)if != nil {returnnil, } = append(, ...)if !.skipCFWS() {returnnil, errors.New("mail: misformatted parenthetical comment") }if .empty() {break }if .peek() != ',' {returnnil, errors.New("mail: expected comma") }// Skip empty entries for obs-addr-list.for .consume(',') { .skipSpace() }if .empty() {break } }return , nil}func ( *addrParser) () (*Address, error) { , := .parseAddress(true)if != nil {returnnil, }if !.skipCFWS() {returnnil, errors.New("mail: misformatted parenthetical comment") }if !.empty() {returnnil, fmt.Errorf("mail: expected single address, got %q", .s) }iflen() == 0 {returnnil, errors.New("mail: empty group") }iflen() > 1 {returnnil, errors.New("mail: group with multiple addresses") }return [0], nil}// parseAddress parses a single RFC 5322 address at the start of p.func ( *addrParser) ( bool) ([]*Address, error) {debug.Printf("parseAddress: %q", .s) .skipSpace()if .empty() {returnnil, errors.New("mail: no address") }// address = mailbox / group // mailbox = name-addr / addr-spec // group = display-name ":" [group-list] ";" [CFWS]// addr-spec has a more restricted grammar than name-addr, // so try parsing it first, and fallback to name-addr. // TODO(dsymonds): Is this really correct? , := .consumeAddrSpec()if == nil {varstring .skipSpace()if !.empty() && .peek() == '(' { , = .consumeDisplayNameComment()if != nil {returnnil, } }return []*Address{{Name: ,Address: , }}, }debug.Printf("parseAddress: not an addr-spec: %v", )debug.Printf("parseAddress: state is now %q", .s)// display-namevarstringif .peek() != '<' { , = .consumePhrase()if != nil {returnnil, } }debug.Printf("parseAddress: displayName=%q", ) .skipSpace()if {if .consume(':') {return .consumeGroupList() } }// angle-addr = "<" addr-spec ">"if !.consume('<') { := truefor , := range {if !isAtext(, true) { = falsebreak } }if {// The input is like "foo.bar"; it's possible the input // meant to be "foo.bar@domain", or "foo.bar <...>".returnnil, errors.New("mail: missing '@' or angle-addr") }// The input is like "Full Name", which couldn't possibly be a // valid email address if followed by "@domain"; the input // likely meant to be "Full Name <...>".returnnil, errors.New("mail: no angle-addr") } , = .consumeAddrSpec()if != nil {returnnil, }if !.consume('>') {returnnil, errors.New("mail: unclosed angle-addr") }debug.Printf("parseAddress: spec=%q", )return []*Address{{Name: ,Address: , }}, nil}func ( *addrParser) () ([]*Address, error) {var []*Address// handle empty group. .skipSpace()if .consume(';') {if !.skipCFWS() {returnnil, errors.New("mail: misformatted parenthetical comment") }return , nil }for { .skipSpace()// embedded groups not allowed. , := .parseAddress(false)if != nil {returnnil, } = append(, ...)if !.skipCFWS() {returnnil, errors.New("mail: misformatted parenthetical comment") }if .consume(';') {if !.skipCFWS() {returnnil, errors.New("mail: misformatted parenthetical comment") }break }if !.consume(',') {returnnil, errors.New("mail: expected comma") } }return , nil}// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.func ( *addrParser) () ( string, error) {debug.Printf("consumeAddrSpec: %q", .s) := *deferfunc() {if != nil { * = } }()// local-part = dot-atom / quoted-stringvarstring .skipSpace()if .empty() {return"", errors.New("mail: no addr-spec") }if .peek() == '"' {// quoted-stringdebug.Printf("consumeAddrSpec: parsing quoted-string") , = .consumeQuotedString()if == "" { = errors.New("mail: empty quoted string in addr-spec") } } else {// dot-atomdebug.Printf("consumeAddrSpec: parsing dot-atom") , = .consumeAtom(true, false) }if != nil {debug.Printf("consumeAddrSpec: failed: %v", )return"", }if !.consume('@') {return"", errors.New("mail: missing @ in addr-spec") }// domain = dot-atom / domain-literalvarstring .skipSpace()if .empty() {return"", errors.New("mail: no domain in addr-spec") }if .peek() == '[' {// domain-literal , = .consumeDomainLiteral()if != nil {return"", } } else {// dot-atom , = .consumeAtom(true, false)if != nil {return"", } }return + "@" + , nil}// consumePhrase parses the RFC 5322 phrase at the start of p.func ( *addrParser) () ( string, error) {debug.Printf("consumePhrase: [%s]", .s)// phrase = 1*wordvar []stringvarboolfor {// obs-phrase allows CFWS after one wordiflen() > 0 {if !.skipCFWS() {return"", errors.New("mail: misformatted parenthetical comment") } }// word = atom / quoted-stringvarstring .skipSpace()if .empty() {break } := falseif .peek() == '"' {// quoted-string , = .consumeQuotedString() } else {// atom // We actually parse dot-atom here to be more permissive // than what RFC 5322 specifies. , = .consumeAtom(true, true)if == nil { , , = .decodeRFC2047Word() } }if != nil {break }debug.Printf("consumePhrase: consumed %q", )if && { [len()-1] += } else { = append(, ) } = }// Ignore any error if we got at least one word.if != nil && len() == 0 {debug.Printf("consumePhrase: hit err: %v", )return"", fmt.Errorf("mail: missing word in phrase: %v", ) } = strings.Join(, " ")return , nil}// consumeQuotedString parses the quoted string at the start of p.func ( *addrParser) () ( string, error) {// Assume first byte is '"'. := 1 := make([]rune, 0, 10) := false:for { , := utf8.DecodeRuneInString(.s[:])switch {case == 0:return"", errors.New("mail: unclosed quoted-string")case == 1 && == utf8.RuneError:return"", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", .s)case :// quoted-pair = ("\" (VCHAR / WSP))if !isVchar() && !isWSP() {return"", fmt.Errorf("mail: bad character in quoted-string: %q", ) } = append(, ) = falsecaseisQtext() || isWSP():// qtext (printable US-ASCII excluding " and \), or // FWS (almost; we're ignoring CRLF) = append(, )case == '"':breakcase == '\\': = truedefault:return"", fmt.Errorf("mail: bad character in quoted-string: %q", ) } += } .s = .s[+1:]returnstring(), nil}// consumeAtom parses an RFC 5322 atom at the start of p.// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.// If permissive is true, consumeAtom will not fail on:// - leading/trailing/double dots in the atom (see golang.org/issue/4938)func ( *addrParser) ( bool, bool) ( string, error) { := 0:for { , := utf8.DecodeRuneInString(.s[:])switch {case == 1 && == utf8.RuneError:return"", fmt.Errorf("mail: invalid utf-8 in address: %q", .s)case == 0 || !isAtext(, ):breakdefault: += } }if == 0 {return"", errors.New("mail: invalid string") } , .s = .s[:], .s[:]if ! {ifstrings.HasPrefix(, ".") {return"", errors.New("mail: leading dot in atom") }ifstrings.Contains(, "..") {return"", errors.New("mail: double dot in atom") }ifstrings.HasSuffix(, ".") {return"", errors.New("mail: trailing dot in atom") } }return , nil}// consumeDomainLiteral parses an RFC 5322 domain-literal at the start of p.func ( *addrParser) () (string, error) {// Skip the leading [if !.consume('[') {return"", errors.New(`mail: missing "[" in domain-literal`) }// Parse the dtextvarstringfor {if .empty() {return"", errors.New("mail: unclosed domain-literal") }if .peek() == ']' {break } , := utf8.DecodeRuneInString(.s)if == 1 && == utf8.RuneError {return"", fmt.Errorf("mail: invalid utf-8 in domain-literal: %q", .s) }if !isDtext() {return"", fmt.Errorf("mail: bad character in domain-literal: %q", ) } += .s[:] .s = .s[:] }// Skip the trailing ]if !.consume(']') {return"", errors.New("mail: unclosed domain-literal") }// Check if the domain literal is an IP addressifnet.ParseIP() == nil {return"", fmt.Errorf("mail: invalid IP address in domain-literal: %q", ) }return"[" + + "]", nil}func ( *addrParser) () (string, error) {if !.consume('(') {return"", errors.New("mail: comment does not start with (") } , := .consumeComment()if ! {return"", errors.New("mail: misformatted parenthetical comment") }// TODO(stapelberg): parse quoted-string within comment := strings.FieldsFunc(, func( rune) bool { return == ' ' || == '\t' })for , := range { , , := .decodeRFC2047Word()if != nil {return"", }if { [] = } }returnstrings.Join(, " "), nil}func ( *addrParser) ( byte) bool {if .empty() || .peek() != {returnfalse } .s = .s[1:]returntrue}// skipSpace skips the leading space and tab characters.func ( *addrParser) () { .s = strings.TrimLeft(.s, " \t")}func ( *addrParser) () byte {return .s[0]}func ( *addrParser) () bool {return .len() == 0}func ( *addrParser) () int {returnlen(.s)}// skipCFWS skips CFWS as defined in RFC5322.func ( *addrParser) () bool { .skipSpace()for {if !.consume('(') {break }if , := .consumeComment(); ! {returnfalse } .skipSpace() }returntrue}func ( *addrParser) () (string, bool) {// '(' already consumed. := 1varstringfor {if .empty() || == 0 {break }if .peek() == '\\' && .len() > 1 { .s = .s[1:] } elseif .peek() == '(' { ++ } elseif .peek() == ')' { -- }if > 0 { += .s[:1] } .s = .s[1:] }return , == 0}func ( *addrParser) ( string) ( string, bool, error) { := .decif == nil { = &rfc2047Decoder }// Substitute our own CharsetReader function so that we can tell // whether an error from the Decode method was due to the // CharsetReader (meaning the charset is invalid). // We used to look for the charsetError type in the error result, // but that behaves badly with CharsetReaders other than the // one in rfc2047Decoder. := * := false .CharsetReader = func( string, io.Reader) (io.Reader, error) {if .CharsetReader == nil { = truereturnnil, charsetError() } , := .CharsetReader(, )if != nil { = true }return , } , = .Decode()if == nil {return , true, nil }// If the error came from the character set reader // (meaning the character set itself is invalid // but the decoding worked fine until then), // return the original text and the error, // with isEncoded=true.if {return , true, }// Ignore invalid RFC 2047 encoded-word errors.return , false, nil}var rfc2047Decoder = mime.WordDecoder{CharsetReader: func( string, io.Reader) (io.Reader, error) {returnnil, charsetError() },}type charsetError stringfunc ( charsetError) () string {returnfmt.Sprintf("charset not supported: %q", string())}// isAtext reports whether r is an RFC 5322 atext character.// If dot is true, period is included.func isAtext( rune, bool) bool {switch {case'.':return// RFC 5322 3.2.3. specialscase'(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specialsreturnfalse }returnisVchar()}// isQtext reports whether r is an RFC 5322 qtext character.func isQtext( rune) bool {// Printable US-ASCII, excluding backslash or quote.if == '\\' || == '"' {returnfalse }returnisVchar()}// quoteString renders a string as an RFC 5322 quoted-string.func quoteString( string) string {varstrings.Builder .WriteByte('"')for , := range {ifisQtext() || isWSP() { .WriteRune() } elseifisVchar() { .WriteByte('\\') .WriteRune() } } .WriteByte('"')return .String()}// isVchar reports whether r is an RFC 5322 VCHAR character.func isVchar( rune) bool {// Visible (printing) characters.return'!' <= && <= '~' || isMultibyte()}// isMultibyte reports whether r is a multi-byte UTF-8 character// as supported by RFC 6532.func isMultibyte( rune) bool {return >= utf8.RuneSelf}// isWSP reports whether r is a WSP (white space).// WSP is a space or horizontal tab (RFC 5234 Appendix B).func isWSP( rune) bool {return == ' ' || == '\t'}// isDtext reports whether r is an RFC 5322 dtext character.func isDtext( rune) bool {// Printable US-ASCII, excluding "[", "]", or "\".if == '[' || == ']' || == '\\' {returnfalse }returnisVchar()}
The pages are generated with Goldsv0.7.0-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.