// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package csv reads and writes comma-separated values (CSV) files.// There are many kinds of CSV files; this package supports the format// described in RFC 4180, except that [Writer] uses LF// instead of CRLF as newline character by default.//// A csv file contains zero or more records of one or more fields per record.// Each record is separated by the newline character. The final record may// optionally be followed by a newline character.//// field1,field2,field3//// White space is considered part of a field.//// Carriage returns before newline characters are silently removed.//// Blank lines are ignored. A line with only whitespace characters (excluding// the ending newline character) is not considered a blank line.//// Fields which start and stop with the quote character " are called// quoted-fields. The beginning and ending quote are not part of the// field.//// The source://// normal string,"quoted-field"//// results in the fields//// {`normal string`, `quoted-field`}//// Within a quoted-field a quote character followed by a second quote// character is considered a single quote.//// "the ""word"" is true","a ""quoted-field"""//// results in//// {`the "word" is true`, `a "quoted-field"`}//// Newlines and commas may be included in a quoted-field//// "Multi-line// field","comma is ,"//// results in//// {`Multi-line// field`, `comma is ,`}
package csvimport ()// A ParseError is returned for parsing errors.// Line and column numbers are 1-indexed.typeParseErrorstruct { StartLine int// Line where the record starts Line int// Line where the error occurred Column int// Column (1-based byte index) where the error occurred Err error// The actual error}func ( *ParseError) () string {if .Err == ErrFieldCount {returnfmt.Sprintf("record on line %d: %v", .Line, .Err) }if .StartLine != .Line {returnfmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", .StartLine, .Line, .Column, .Err) }returnfmt.Sprintf("parse error on line %d, column %d: %v", .Line, .Column, .Err)}func ( *ParseError) () error { return .Err }// These are the errors that can be returned in [ParseError.Err].var (ErrBareQuote = errors.New("bare \" in non-quoted-field")ErrQuote = errors.New("extraneous or missing \" in quoted-field")ErrFieldCount = errors.New("wrong number of fields")// Deprecated: ErrTrailingComma is no longer used.ErrTrailingComma = errors.New("extra delimiter at end of line"))var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")func validDelim( rune) bool {return != 0 && != '"' && != '\r' && != '\n' && utf8.ValidRune() && != utf8.RuneError}// A Reader reads records from a CSV-encoded file.//// As returned by [NewReader], a Reader expects input conforming to RFC 4180.// The exported fields can be changed to customize the details before the// first call to [Reader.Read] or [Reader.ReadAll].//// The Reader converts all \r\n sequences in its input to plain \n,// including in multiline field values, so that the returned data does// not depend on which line-ending convention an input file uses.typeReaderstruct {// Comma is the field delimiter. // It is set to comma (',') by NewReader. // Comma must be a valid rune and must not be \r, \n, // or the Unicode replacement character (0xFFFD). Comma rune// Comment, if not 0, is the comment character. Lines beginning with the // Comment character without preceding whitespace are ignored. // With leading whitespace the Comment character becomes part of the // field, even if TrimLeadingSpace is true. // Comment must be a valid rune and must not be \r, \n, // or the Unicode replacement character (0xFFFD). // It must also not be equal to Comma. Comment rune// FieldsPerRecord is the number of expected fields per record. // If FieldsPerRecord is positive, Read requires each record to // have the given number of fields. If FieldsPerRecord is 0, Read sets it to // the number of fields in the first record, so that future records must // have the same field count. If FieldsPerRecord is negative, no check is // made and records may have a variable number of fields. FieldsPerRecord int// If LazyQuotes is true, a quote may appear in an unquoted field and a // non-doubled quote may appear in a quoted field. LazyQuotes bool// If TrimLeadingSpace is true, leading white space in a field is ignored. // This is done even if the field delimiter, Comma, is white space. TrimLeadingSpace bool// ReuseRecord controls whether calls to Read may return a slice sharing // the backing array of the previous call's returned slice for performance. // By default, each call to Read returns newly allocated memory owned by the caller. ReuseRecord bool// Deprecated: TrailingComma is no longer used. TrailingComma bool r *bufio.Reader// numLine is the current line being read in the CSV file. numLine int// offset is the input stream byte offset of the current reader position. offset int64// rawBuffer is a line buffer only used by the readLine method. rawBuffer []byte// recordBuffer holds the unescaped fields, one after another. // The fields can be accessed by using the indexes in fieldIndexes. // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` // and fieldIndexes will contain the indexes [1, 2, 5, 6]. recordBuffer []byte// fieldIndexes is an index of fields inside recordBuffer. // The i'th field ends at offset fieldIndexes[i] in recordBuffer. fieldIndexes []int// fieldPositions is an index of field positions for the // last record returned by Read. fieldPositions []position// lastRecord is a record cache and only used when ReuseRecord == true. lastRecord []string}// NewReader returns a new Reader that reads from r.func ( io.Reader) *Reader {return &Reader{Comma: ',',r: bufio.NewReader(), }}// Read reads one record (a slice of fields) from r.// If the record has an unexpected number of fields,// Read returns the record along with the error [ErrFieldCount].// If the record contains a field that cannot be parsed,// Read returns a partial record along with the parse error.// The partial record contains all fields read before the error.// If there is no data left to be read, Read returns nil, [io.EOF].// If [Reader.ReuseRecord] is true, the returned slice may be shared// between multiple calls to Read.func ( *Reader) () ( []string, error) {if .ReuseRecord { , = .readRecord(.lastRecord) .lastRecord = } else { , = .readRecord(nil) }return , }// FieldPos returns the line and column corresponding to// the start of the field with the given index in the slice most recently// returned by [Reader.Read]. Numbering of lines and columns starts at 1;// columns are counted in bytes, not runes.//// If this is called with an out-of-bounds index, it panics.func ( *Reader) ( int) (, int) {if < 0 || >= len(.fieldPositions) {panic("out of range index passed to FieldPos") } := &.fieldPositions[]return .line, .col}// InputOffset returns the input stream byte offset of the current reader// position. The offset gives the location of the end of the most recently// read row and the beginning of the next row.func ( *Reader) () int64 {return .offset}// pos holds the position of a field in the current line.type position struct { line, col int}// ReadAll reads all the remaining records from r.// Each record is a slice of fields.// A successful call returns err == nil, not err == [io.EOF]. Because ReadAll is// defined to read until EOF, it does not treat end of file as an error to be// reported.func ( *Reader) () ( [][]string, error) {for { , := .readRecord(nil)if == io.EOF {return , nil }if != nil {returnnil, } = append(, ) }}// readLine reads the next line (with the trailing endline).// If EOF is hit without a trailing endline, it will be omitted.// If some bytes were read, then the error is never [io.EOF].// The result is only valid until the next call to readLine.func ( *Reader) () ([]byte, error) { , := .r.ReadSlice('\n')if == bufio.ErrBufferFull { .rawBuffer = append(.rawBuffer[:0], ...)for == bufio.ErrBufferFull { , = .r.ReadSlice('\n') .rawBuffer = append(.rawBuffer, ...) } = .rawBuffer } := len()if > 0 && == io.EOF { = nil// For backwards compatibility, drop trailing \r before EOF.if [-1] == '\r' { = [:-1] } } .numLine++ .offset += int64()// Normalize \r\n to \n on all input lines.if := len(); >= 2 && [-2] == '\r' && [-1] == '\n' { [-2] = '\n' = [:-1] }return , }// lengthNL reports the number of bytes for the trailing \n.func lengthNL( []byte) int {iflen() > 0 && [len()-1] == '\n' {return1 }return0}// nextRune returns the next rune in b or utf8.RuneError.func nextRune( []byte) rune { , := utf8.DecodeRune()return}func ( *Reader) ( []string) ([]string, error) {if .Comma == .Comment || !validDelim(.Comma) || (.Comment != 0 && !validDelim(.Comment)) {returnnil, errInvalidDelim }// Read line (automatically skipping past empty lines and any comments).var []bytevarerrorfor == nil { , = .readLine()if .Comment != 0 && nextRune() == .Comment { = nilcontinue// Skip comment lines }if == nil && len() == lengthNL() { = nilcontinue// Skip empty lines }break }if == io.EOF {returnnil, }// Parse each field in the record.varerrorconst = len(`"`) := utf8.RuneLen(.Comma) := .numLine// Starting line for record .recordBuffer = .recordBuffer[:0] .fieldIndexes = .fieldIndexes[:0] .fieldPositions = .fieldPositions[:0] := position{line: .numLine, col: 1}:for {if .TrimLeadingSpace { := bytes.IndexFunc(, func( rune) bool {return !unicode.IsSpace() })if < 0 { = len() .col -= lengthNL() } = [:] .col += }iflen() == 0 || [0] != '"' {// Non-quoted string field := bytes.IndexRune(, .Comma) := if >= 0 { = [:] } else { = [:len()-lengthNL()] }// Check to make sure a quote does not appear in field.if !.LazyQuotes {if := bytes.IndexByte(, '"'); >= 0 { := .col + = &ParseError{StartLine: , Line: .numLine, Column: , Err: ErrBareQuote}break } } .recordBuffer = append(.recordBuffer, ...) .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, )if >= 0 { = [+:] .col += + continue }break } else {// Quoted string field := = [:] .col += for { := bytes.IndexByte(, '"')if >= 0 {// Hit next quote. .recordBuffer = append(.recordBuffer, [:]...) = [+:] .col += + switch := nextRune(); {case == '"':// `""` sequence (append quote). .recordBuffer = append(.recordBuffer, '"') = [:] .col += case == .Comma:// `",` sequence (end of field). = [:] .col += .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, )continuecaselengthNL() == len():// `"\n` sequence (end of line). .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, )breakcase .LazyQuotes:// `"` sequence (bare quote). .recordBuffer = append(.recordBuffer, '"')default:// `"*` sequence (invalid non-escaped quote). = &ParseError{StartLine: , Line: .numLine, Column: .col - , Err: ErrQuote}break } } elseiflen() > 0 {// Hit end of line (copy all data so far). .recordBuffer = append(.recordBuffer, ...)if != nil {break } .col += len() , = .readLine()iflen() > 0 { .line++ .col = 1 }if == io.EOF { = nil } } else {// Abrupt end of file (EOF or error).if !.LazyQuotes && == nil { = &ParseError{StartLine: , Line: .line, Column: .col, Err: ErrQuote}break } .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, )break } } } }if == nil { = }// Create a single string and create slices out of it. // This pins the memory of the fields together, but allocates once. := string(.recordBuffer) // Convert to string once to batch allocations = [:0]ifcap() < len(.fieldIndexes) { = make([]string, len(.fieldIndexes)) } = [:len(.fieldIndexes)]varintfor , := range .fieldIndexes { [] = [:] = }// Check or update the expected fields per record.if .FieldsPerRecord > 0 {iflen() != .FieldsPerRecord && == nil { = &ParseError{StartLine: ,Line: ,Column: 1,Err: ErrFieldCount, } } } elseif .FieldsPerRecord == 0 { .FieldsPerRecord = len() }return , }
The pages are generated with Goldsv0.7.0-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.