// Copyright 2010 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package textprotoimport (_// for linkname)// TODO: This should be a distinguishable error (ErrMessageTooLarge)// to allow mime/multipart to detect it.var errMessageTooLarge = errors.New("message too large")// A Reader implements convenience methods for reading requests// or responses from a text protocol network connection.typeReaderstruct { R *bufio.Reader dot *dotReader buf []byte// a re-usable buffer for readContinuedLineSlice}// NewReader returns a new [Reader] reading from r.//// To avoid denial of service attacks, the provided [bufio.Reader]// should be reading from an [io.LimitReader] or similar Reader to bound// the size of responses.func ( *bufio.Reader) *Reader {return &Reader{R: }}// ReadLine reads a single line from r,// eliding the final \n or \r\n from the returned string.func ( *Reader) () (string, error) { , := .readLineSlice(-1)returnstring(), }// ReadLineBytes is like [Reader.ReadLine] but returns a []byte instead of a string.func ( *Reader) () ([]byte, error) { , := .readLineSlice(-1)if != nil { = bytes.Clone() }return , }// readLineSlice reads a single line from r,// up to lim bytes long (or unlimited if lim is less than 0),// eliding the final \r or \r\n from the returned string.func ( *Reader) ( int64) ([]byte, error) { .closeDot()var []bytefor { , , := .R.ReadLine()if != nil {returnnil, }if >= 0 && int64(len())+int64(len()) > {returnnil, errMessageTooLarge }// Avoid the copy if the first call produced a full line.if == nil && ! {return , nil } = append(, ...)if ! {break } }return , nil}// ReadContinuedLine reads a possibly continued line from r,// eliding the final trailing ASCII white space.// Lines after the first are considered continuations if they// begin with a space or tab character. In the returned data,// continuation lines are separated from the previous line// only by a single space: the newline and leading white space// are removed.//// For example, consider this input://// Line 1// continued...// Line 2//// The first call to ReadContinuedLine will return "Line 1 continued..."// and the second will return "Line 2".//// Empty lines are never continued.func ( *Reader) () (string, error) { , := .readContinuedLineSlice(-1, noValidation)returnstring(), }// trim returns s with leading and trailing spaces and tabs removed.// It does not assume Unicode or UTF-8.func trim( []byte) []byte { := 0for < len() && ([] == ' ' || [] == '\t') { ++ } := len()for > && ([-1] == ' ' || [-1] == '\t') { -- }return [:]}// ReadContinuedLineBytes is like [Reader.ReadContinuedLine] but// returns a []byte instead of a string.func ( *Reader) () ([]byte, error) { , := .readContinuedLineSlice(-1, noValidation)if != nil { = bytes.Clone() }return , }// readContinuedLineSlice reads continued lines from the reader buffer,// returning a byte slice with all lines. The validateFirstLine function// is run on the first read line, and if it returns an error then this// error is returned from readContinuedLineSlice.// It reads up to lim bytes of data (or unlimited if lim is less than 0).func ( *Reader) ( int64, func([]byte) error) ([]byte, error) {if == nil {returnnil, fmt.Errorf("missing validateFirstLine func") }// Read the first line. , := .readLineSlice()if != nil {returnnil, }iflen() == 0 { // blank line - no continuationreturn , nil }if := (); != nil {returnnil, }// Optimistically assume that we have started to buffer the next line // and it starts with an ASCII letter (the next header key), or a blank // line, so we can avoid copying that buffered data around in memory // and skipping over non-existent whitespace.if .R.Buffered() > 1 { , := .R.Peek(2)iflen() > 0 && (isASCIILetter([0]) || [0] == '\n') ||len() == 2 && [0] == '\r' && [1] == '\n' {returntrim(), nil } }// ReadByte or the next readLineSlice will flush the read buffer; // copy the slice into buf. .buf = append(.buf[:0], trim()...)if < 0 { = math.MaxInt64 } -= int64(len(.buf))// Read continuation lines.for .skipSpace() > 0 { .buf = append(.buf, ' ')ifint64(len(.buf)) >= {returnnil, errMessageTooLarge } , := .readLineSlice( - int64(len(.buf)))if != nil {break } .buf = append(.buf, trim()...) }return .buf, nil}// skipSpace skips R over all spaces and returns the number of bytes skipped.func ( *Reader) () int { := 0for { , := .R.ReadByte()if != nil {// Bufio will keep err until next read.break }if != ' ' && != '\t' { .R.UnreadByte()break } ++ }return}func ( *Reader) ( int) ( int, bool, string, error) { , := .ReadLine()if != nil {return }returnparseCodeLine(, )}func parseCodeLine( string, int) ( int, bool, string, error) {iflen() < 4 || [3] != ' ' && [3] != '-' { = ProtocolError("short response: " + )return } = [3] == '-' , = strconv.Atoi([0:3])if != nil || < 100 { = ProtocolError("invalid response code: " + )return } = [4:]if1 <= && < 10 && /100 != ||10 <= && < 100 && /10 != ||100 <= && < 1000 && != { = &Error{, } }return}// ReadCodeLine reads a response code line of the form//// code message//// where code is a three-digit status code and the message// extends to the rest of the line. An example of such a line is://// 220 plan9.bell-labs.com ESMTP//// If the prefix of the status does not match the digits in expectCode,// ReadCodeLine returns with err set to &Error{code, message}.// For example, if expectCode is 31, an error will be returned if// the status is not in the range [310,319].//// If the response is multi-line, ReadCodeLine returns an error.//// An expectCode <= 0 disables the check of the status code.func ( *Reader) ( int) ( int, string, error) { , , , := .readCodeLine()if == nil && { = ProtocolError("unexpected multi-line response: " + ) }return}// ReadResponse reads a multi-line response of the form://// code-message line 1// code-message line 2// ...// code message line n//// where code is a three-digit status code. The first line starts with the// code and a hyphen. The response is terminated by a line that starts// with the same code followed by a space. Each line in message is// separated by a newline (\n).//// See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for// details of another form of response accepted://// code-message line 1// message line 2// ...// code message line n//// If the prefix of the status does not match the digits in expectCode,// ReadResponse returns with err set to &Error{code, message}.// For example, if expectCode is 31, an error will be returned if// the status is not in the range [310,319].//// An expectCode <= 0 disables the check of the status code.func ( *Reader) ( int) ( int, string, error) { , , , := .readCodeLine() := for { , := .ReadLine()if != nil {return0, "", }varintvarstring , , , = parseCodeLine(, 0)if != nil || != { += "\n" + strings.TrimRight(, "\r\n") = truecontinue } += "\n" + }if != nil && && != "" {// replace one line error message with all lines (full message) = &Error{, } }return}// DotReader returns a new [Reader] that satisfies Reads using the// decoded text of a dot-encoded block read from r.// The returned Reader is only valid until the next call// to a method on r.//// Dot encoding is a common framing used for data blocks// in text protocols such as SMTP. The data consists of a sequence// of lines, each of which ends in "\r\n". The sequence itself// ends at a line containing just a dot: ".\r\n". Lines beginning// with a dot are escaped with an additional dot to avoid// looking like the end of the sequence.//// The decoded form returned by the Reader's Read method// rewrites the "\r\n" line endings into the simpler "\n",// removes leading dot escapes if present, and stops with error [io.EOF]// after consuming (and discarding) the end-of-sequence line.func ( *Reader) () io.Reader { .closeDot() .dot = &dotReader{r: }return .dot}type dotReader struct { r *Reader state int}// Read satisfies reads by decoding dot-encoded data read from d.r.func ( *dotReader) ( []byte) ( int, error) {// Run data through a simple state machine to // elide leading dots, rewrite trailing \r\n into \n, // and detect ending .\r\n line.const ( = iota// beginning of line; initial state; must be zero// read . at beginning of line// read .\r at beginning of line// read \r (possibly at end of line)// reading data in middle of line// reached .\r\n end marker line ) := .r.Rfor < len() && .state != {varbyte , = .ReadByte()if != nil {if == io.EOF { = io.ErrUnexpectedEOF }break }switch .state {case :if == '.' { .state = continue }if == '\r' { .state = continue } .state = case :if == '\r' { .state = continue }if == '\n' { .state = continue } .state = case :if == '\n' { .state = continue }// Not part of .\r\n. // Consume leading dot and emit saved \r. .UnreadByte() = '\r' .state = case :if == '\n' { .state = break }// Not part of \r\n. Emit saved \r .UnreadByte() = '\r' .state = case :if == '\r' { .state = continue }if == '\n' { .state = } } [] = ++ }if == nil && .state == { = io.EOF }if != nil && .r.dot == { .r.dot = nil }return}// closeDot drains the current DotReader if any,// making sure that it reads until the ending dot line.func ( *Reader) () {if .dot == nil {return } := make([]byte, 128)for .dot != nil {// When Read reaches EOF or an error, // it will set r.dot == nil. .dot.Read() }}// ReadDotBytes reads a dot-encoding and returns the decoded data.//// See the documentation for the [Reader.DotReader] method for details about dot-encoding.func ( *Reader) () ([]byte, error) {returnio.ReadAll(.DotReader())}// ReadDotLines reads a dot-encoding and returns a slice// containing the decoded lines, with the final \r\n or \n elided from each.//// See the documentation for the [Reader.DotReader] method for details about dot-encoding.func ( *Reader) () ([]string, error) {// We could use ReadDotBytes and then Split it, // but reading a line at a time avoids needing a // large contiguous block of memory and is simpler.var []stringvarerrorfor {varstring , = .ReadLine()if != nil {if == io.EOF { = io.ErrUnexpectedEOF }break }// Dot by itself marks end; otherwise cut one dot.iflen() > 0 && [0] == '.' {iflen() == 1 {break } = [1:] } = append(, ) }return , }var colon = []byte(":")// ReadMIMEHeader reads a MIME-style header from r.// The header is a sequence of possibly continued Key: Value lines// ending in a blank line.// The returned map m maps [CanonicalMIMEHeaderKey](key) to a// sequence of values in the same order encountered in the input.//// For example, consider this input://// My-Key: Value 1// Long-Key: Even// Longer Value// My-Key: Value 2//// Given that input, ReadMIMEHeader returns the map://// map[string][]string{// "My-Key": {"Value 1", "Value 2"},// "Long-Key": {"Even Longer Value"},// }func ( *Reader) () (MIMEHeader, error) {returnreadMIMEHeader(, math.MaxInt64, math.MaxInt64)}// readMIMEHeader is accessed from mime/multipart.//go:linkname readMIMEHeader// readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size.// It is called by the mime/multipart package.func readMIMEHeader( *Reader, , int64) (MIMEHeader, error) {// Avoid lots of small slice allocations later by allocating one // large one ahead of time which we'll cut up into smaller // slices. If this isn't big enough later, we allocate small ones.var []string := .upcomingHeaderKeys()if > 0 {if > 1000 { = 1000// set a cap to avoid overallocation } = make([]string, ) } := make(MIMEHeader, )// Account for 400 bytes of overhead for the MIMEHeader, plus 200 bytes per entry. // Benchmarking map creation as of go1.20, a one-entry MIMEHeader is 416 bytes and large // MIMEHeaders average about 200 bytes per entry. -= 400const = 200// The first line cannot start with a leading space.if , := .R.Peek(1); == nil && ([0] == ' ' || [0] == '\t') {const = 80// arbitrary limit on how much of the line we'll quote , := .readLineSlice()if != nil {return , }return , ProtocolError("malformed MIME header initial line: " + string()) }for { , := .readContinuedLineSlice(, mustHaveFieldNameColon)iflen() == 0 {return , }// Key ends at first colon. , , := bytes.Cut(, colon)if ! {return , ProtocolError("malformed MIME header line: " + string()) } , := canonicalMIMEHeaderKey()if ! {return , ProtocolError("malformed MIME header line: " + string()) }for , := range {if !validHeaderValueByte() {return , ProtocolError("malformed MIME header line: " + string()) } } --if < 0 {returnnil, errMessageTooLarge }// Skip initial spaces in value. := string(bytes.TrimLeft(, " \t")) := []if == nil { -= int64(len()) -= } -= int64(len())if < 0 {return , errMessageTooLarge }if == nil && len() > 0 {// More than likely this will be a single-element key. // Most headers aren't multi-valued. // Set the capacity on strs[0] to 1, so any future append // won't extend the slice into the other strings. , = [:1:1], [1:] [0] = [] = } else { [] = append(, ) }if != nil {return , } }}// noValidation is a no-op validation func for readContinuedLineSlice// that permits any lines.func noValidation( []byte) error { returnnil }// mustHaveFieldNameColon ensures that, per RFC 7230, the// field-name is on a single line, so the first line must// contain a colon.func mustHaveFieldNameColon( []byte) error {ifbytes.IndexByte(, ':') < 0 {returnProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", )) }returnnil}var nl = []byte("\n")// upcomingHeaderKeys returns an approximation of the number of keys// that will be in this header. If it gets confused, it returns 0.func ( *Reader) () ( int) {// Try to determine the 'hint' size. .R.Peek(1) // force a buffer load if empty := .R.Buffered()if == 0 {return } , := .R.Peek()forlen() > 0 && < 1000 {var []byte , , _ = bytes.Cut(, nl)iflen() == 0 || (len() == 1 && [0] == '\r') {// Blank line separating headers from the body.break }if [0] == ' ' || [0] == '\t' {// Folded continuation of the previous line.continue } ++ }return}// CanonicalMIMEHeaderKey returns the canonical format of the// MIME header key s. The canonicalization converts the first// letter and any letter following a hyphen to upper case;// the rest are converted to lowercase. For example, the// canonical key for "accept-encoding" is "Accept-Encoding".// MIME header keys are assumed to be ASCII only.// If s contains a space or invalid header field bytes, it is// returned without modifications.func ( string) string {// Quick check for canonical encoding. := truefor := 0; < len(); ++ { := []if !validHeaderFieldByte() {return }if && 'a' <= && <= 'z' { , _ = canonicalMIMEHeaderKey([]byte())return }if ! && 'A' <= && <= 'Z' { , _ = canonicalMIMEHeaderKey([]byte())return } = == '-' }return}const toLower = 'a' - 'A'// validHeaderFieldByte reports whether c is a valid byte in a header// field name. RFC 7230 says://// header-field = field-name ":" OWS field-value OWS// field-name = token// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA// token = 1*tcharfunc validHeaderFieldByte( byte) bool {// mask is a 128-bit bitmap with 1s for allowed bytes, // so that the byte c can be tested with a shift and an and. // If c >= 128, then 1<<c and 1<<(c-64) will both be zero, // and this function will return false.const = 0 | (1<<(10)-1)<<'0' | (1<<(26)-1)<<'a' | (1<<(26)-1)<<'A' |1<<'!' |1<<'#' |1<<'$' |1<<'%' |1<<'&' |1<<'\'' |1<<'*' |1<<'+' |1<<'-' |1<<'.' |1<<'^' |1<<'_' |1<<'`' |1<<'|' |1<<'~'return ((uint64(1)<<)&(&(1<<64-1)) | (uint64(1)<<(-64))&(>>64)) != 0}// validHeaderValueByte reports whether c is a valid byte in a header// field value. RFC 7230 says://// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]// field-vchar = VCHAR / obs-text// obs-text = %x80-FF//// RFC 5234 says://// HTAB = %x09// SP = %x20// VCHAR = %x21-7Efunc validHeaderValueByte( byte) bool {// mask is a 128-bit bitmap with 1s for allowed bytes, // so that the byte c can be tested with a shift and an and. // If c >= 128, then 1<<c and 1<<(c-64) will both be zero. // Since this is the obs-text range, we invert the mask to // create a bitmap with 1s for disallowed bytes.const = 0 | (1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E1<<0x20 | // SP: %x201<<0x09// HTAB: %x09return ((uint64(1)<<)&^(&(1<<64-1)) | (uint64(1)<<(-64))&^(>>64)) == 0}// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is// allowed to mutate the provided byte slice before returning the// string.//// For invalid inputs (if a contains spaces or non-token bytes), a// is unchanged and a string copy is returned.//// ok is true if the header key contains only valid characters and spaces.// ReadMIMEHeader accepts header keys containing spaces, but does not// canonicalize them.func canonicalMIMEHeaderKey( []byte) ( string, bool) {iflen() == 0 {return"", false }// See if a looks like a header key. If not, return it unchanged. := falsefor , := range {ifvalidHeaderFieldByte() {continue }// Don't canonicalize.if == ' ' {// We accept invalid headers with a space before the // colon, but must not canonicalize them. // See https://go.dev/issue/34540. = truecontinue }returnstring(), false }if {returnstring(), true } := truefor , := range {// Canonicalize: first letter upper case // and upper case after each dash. // (Host, User-Agent, If-Modified-Since). // MIME headers are ASCII only, so no Unicode issues.if && 'a' <= && <= 'z' { -= toLower } elseif ! && 'A' <= && <= 'Z' { += toLower } [] = = == '-'// for next time }commonHeaderOnce.Do(initCommonHeader)// The compiler recognizes m[string(byteSlice)] as a special // case, so a copy of a's bytes into a new string does not // happen in this map lookup:if := commonHeader[string()]; != "" {return , true }returnstring(), true}// commonHeader interns common header strings.var commonHeader map[string]stringvar commonHeaderOnce sync.Oncefunc initCommonHeader() {commonHeader = make(map[string]string)for , := range []string{"Accept","Accept-Charset","Accept-Encoding","Accept-Language","Accept-Ranges","Cache-Control","Cc","Connection","Content-Id","Content-Language","Content-Length","Content-Transfer-Encoding","Content-Type","Cookie","Date","Dkim-Signature","Etag","Expires","From","Host","If-Modified-Since","If-None-Match","In-Reply-To","Last-Modified","Location","Message-Id","Mime-Version","Pragma","Received","Return-Path","Server","Set-Cookie","Subject","To","User-Agent","Via","X-Forwarded-For","X-Imforwards","X-Powered-By", } {commonHeader[] = }}
The pages are generated with Goldsv0.7.3. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.