// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package csv reads and writes comma-separated values (CSV) files. // There are many kinds of CSV files; this package supports the format // described in RFC 4180. // // A csv file contains zero or more records of one or more fields per record. // Each record is separated by the newline character. The final record may // optionally be followed by a newline character. // // field1,field2,field3 // // White space is considered part of a field. // // Carriage returns before newline characters are silently removed. // // Blank lines are ignored. A line with only whitespace characters (excluding // the ending newline character) is not considered a blank line. // // Fields which start and stop with the quote character " are called // quoted-fields. The beginning and ending quote are not part of the // field. // // The source: // // normal string,"quoted-field" // // results in the fields // // {`normal string`, `quoted-field`} // // Within a quoted-field a quote character followed by a second quote // character is considered a single quote. // // "the ""word"" is true","a ""quoted-field""" // // results in // // {`the "word" is true`, `a "quoted-field"`} // // Newlines and commas may be included in a quoted-field // // "Multi-line // field","comma is ," // // results in // // {`Multi-line // field`, `comma is ,`}
package csv import ( ) // A ParseError is returned for parsing errors. // Line and column numbers are 1-indexed. type ParseError struct { StartLine int // Line where the record starts Line int // Line where the error occurred Column int // Column (1-based byte index) where the error occurred Err error // The actual error } func ( *ParseError) () string { if .Err == ErrFieldCount { return fmt.Sprintf("record on line %d: %v", .Line, .Err) } if .StartLine != .Line { return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", .StartLine, .Line, .Column, .Err) } return fmt.Sprintf("parse error on line %d, column %d: %v", .Line, .Column, .Err) } func ( *ParseError) () error { return .Err } // These are the errors that can be returned in [ParseError.Err]. var ( ErrBareQuote = errors.New("bare \" in non-quoted-field") ErrQuote = errors.New("extraneous or missing \" in quoted-field") ErrFieldCount = errors.New("wrong number of fields") // Deprecated: ErrTrailingComma is no longer used. ErrTrailingComma = errors.New("extra delimiter at end of line") ) var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") func validDelim( rune) bool { return != 0 && != '"' && != '\r' && != '\n' && utf8.ValidRune() && != utf8.RuneError } // A Reader reads records from a CSV-encoded file. // // As returned by [NewReader], a Reader expects input conforming to RFC 4180. // The exported fields can be changed to customize the details before the // first call to [Reader.Read] or [Reader.ReadAll]. // // The Reader converts all \r\n sequences in its input to plain \n, // including in multiline field values, so that the returned data does // not depend on which line-ending convention an input file uses. type Reader struct { // Comma is the field delimiter. // It is set to comma (',') by NewReader. // Comma must be a valid rune and must not be \r, \n, // or the Unicode replacement character (0xFFFD). Comma rune // Comment, if not 0, is the comment character. Lines beginning with the // Comment character without preceding whitespace are ignored. // With leading whitespace the Comment character becomes part of the // field, even if TrimLeadingSpace is true. // Comment must be a valid rune and must not be \r, \n, // or the Unicode replacement character (0xFFFD). // It must also not be equal to Comma. Comment rune // FieldsPerRecord is the number of expected fields per record. // If FieldsPerRecord is positive, Read requires each record to // have the given number of fields. If FieldsPerRecord is 0, Read sets it to // the number of fields in the first record, so that future records must // have the same field count. If FieldsPerRecord is negative, no check is // made and records may have a variable number of fields. FieldsPerRecord int // If LazyQuotes is true, a quote may appear in an unquoted field and a // non-doubled quote may appear in a quoted field. LazyQuotes bool // If TrimLeadingSpace is true, leading white space in a field is ignored. // This is done even if the field delimiter, Comma, is white space. TrimLeadingSpace bool // ReuseRecord controls whether calls to Read may return a slice sharing // the backing array of the previous call's returned slice for performance. // By default, each call to Read returns newly allocated memory owned by the caller. ReuseRecord bool // Deprecated: TrailingComma is no longer used. TrailingComma bool r *bufio.Reader // numLine is the current line being read in the CSV file. numLine int // offset is the input stream byte offset of the current reader position. offset int64 // rawBuffer is a line buffer only used by the readLine method. rawBuffer []byte // recordBuffer holds the unescaped fields, one after another. // The fields can be accessed by using the indexes in fieldIndexes. // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` // and fieldIndexes will contain the indexes [1, 2, 5, 6]. recordBuffer []byte // fieldIndexes is an index of fields inside recordBuffer. // The i'th field ends at offset fieldIndexes[i] in recordBuffer. fieldIndexes []int // fieldPositions is an index of field positions for the // last record returned by Read. fieldPositions []position // lastRecord is a record cache and only used when ReuseRecord == true. lastRecord []string } // NewReader returns a new Reader that reads from r. func ( io.Reader) *Reader { return &Reader{ Comma: ',', r: bufio.NewReader(), } } // Read reads one record (a slice of fields) from r. // If the record has an unexpected number of fields, // Read returns the record along with the error [ErrFieldCount]. // If the record contains a field that cannot be parsed, // Read returns a partial record along with the parse error. // The partial record contains all fields read before the error. // If there is no data left to be read, Read returns nil, [io.EOF]. // If [Reader.ReuseRecord] is true, the returned slice may be shared // between multiple calls to Read. func ( *Reader) () ( []string, error) { if .ReuseRecord { , = .readRecord(.lastRecord) .lastRecord = } else { , = .readRecord(nil) } return , } // FieldPos returns the line and column corresponding to // the start of the field with the given index in the slice most recently // returned by [Reader.Read]. Numbering of lines and columns starts at 1; // columns are counted in bytes, not runes. // // If this is called with an out-of-bounds index, it panics. func ( *Reader) ( int) (, int) { if < 0 || >= len(.fieldPositions) { panic("out of range index passed to FieldPos") } := &.fieldPositions[] return .line, .col } // InputOffset returns the input stream byte offset of the current reader // position. The offset gives the location of the end of the most recently // read row and the beginning of the next row. func ( *Reader) () int64 { return .offset } // pos holds the position of a field in the current line. type position struct { line, col int } // ReadAll reads all the remaining records from r. // Each record is a slice of fields. // A successful call returns err == nil, not err == [io.EOF]. Because ReadAll is // defined to read until EOF, it does not treat end of file as an error to be // reported. func ( *Reader) () ( [][]string, error) { for { , := .readRecord(nil) if == io.EOF { return , nil } if != nil { return nil, } = append(, ) } } // readLine reads the next line (with the trailing endline). // If EOF is hit without a trailing endline, it will be omitted. // If some bytes were read, then the error is never [io.EOF]. // The result is only valid until the next call to readLine. func ( *Reader) () ([]byte, error) { , := .r.ReadSlice('\n') if == bufio.ErrBufferFull { .rawBuffer = append(.rawBuffer[:0], ...) for == bufio.ErrBufferFull { , = .r.ReadSlice('\n') .rawBuffer = append(.rawBuffer, ...) } = .rawBuffer } := len() if > 0 && == io.EOF { = nil // For backwards compatibility, drop trailing \r before EOF. if [-1] == '\r' { = [:-1] } } .numLine++ .offset += int64() // Normalize \r\n to \n on all input lines. if := len(); >= 2 && [-2] == '\r' && [-1] == '\n' { [-2] = '\n' = [:-1] } return , } // lengthNL reports the number of bytes for the trailing \n. func lengthNL( []byte) int { if len() > 0 && [len()-1] == '\n' { return 1 } return 0 } // nextRune returns the next rune in b or utf8.RuneError. func nextRune( []byte) rune { , := utf8.DecodeRune() return } func ( *Reader) ( []string) ([]string, error) { if .Comma == .Comment || !validDelim(.Comma) || (.Comment != 0 && !validDelim(.Comment)) { return nil, errInvalidDelim } // Read line (automatically skipping past empty lines and any comments). var []byte var error for == nil { , = .readLine() if .Comment != 0 && nextRune() == .Comment { = nil continue // Skip comment lines } if == nil && len() == lengthNL() { = nil continue // Skip empty lines } break } if == io.EOF { return nil, } // Parse each field in the record. var error const = len(`"`) := utf8.RuneLen(.Comma) := .numLine // Starting line for record .recordBuffer = .recordBuffer[:0] .fieldIndexes = .fieldIndexes[:0] .fieldPositions = .fieldPositions[:0] := position{line: .numLine, col: 1} : for { if .TrimLeadingSpace { := bytes.IndexFunc(, func( rune) bool { return !unicode.IsSpace() }) if < 0 { = len() .col -= lengthNL() } = [:] .col += } if len() == 0 || [0] != '"' { // Non-quoted string field := bytes.IndexRune(, .Comma) := if >= 0 { = [:] } else { = [:len()-lengthNL()] } // Check to make sure a quote does not appear in field. if !.LazyQuotes { if := bytes.IndexByte(, '"'); >= 0 { := .col + = &ParseError{StartLine: , Line: .numLine, Column: , Err: ErrBareQuote} break } } .recordBuffer = append(.recordBuffer, ...) .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, ) if >= 0 { = [+:] .col += + continue } break } else { // Quoted string field := = [:] .col += for { := bytes.IndexByte(, '"') if >= 0 { // Hit next quote. .recordBuffer = append(.recordBuffer, [:]...) = [+:] .col += + switch := nextRune(); { case == '"': // `""` sequence (append quote). .recordBuffer = append(.recordBuffer, '"') = [:] .col += case == .Comma: // `",` sequence (end of field). = [:] .col += .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, ) continue case lengthNL() == len(): // `"\n` sequence (end of line). .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, ) break case .LazyQuotes: // `"` sequence (bare quote). .recordBuffer = append(.recordBuffer, '"') default: // `"*` sequence (invalid non-escaped quote). = &ParseError{StartLine: , Line: .numLine, Column: .col - , Err: ErrQuote} break } } else if len() > 0 { // Hit end of line (copy all data so far). .recordBuffer = append(.recordBuffer, ...) if != nil { break } .col += len() , = .readLine() if len() > 0 { .line++ .col = 1 } if == io.EOF { = nil } } else { // Abrupt end of file (EOF or error). if !.LazyQuotes && == nil { = &ParseError{StartLine: , Line: .line, Column: .col, Err: ErrQuote} break } .fieldIndexes = append(.fieldIndexes, len(.recordBuffer)) .fieldPositions = append(.fieldPositions, ) break } } } } if == nil { = } // Create a single string and create slices out of it. // This pins the memory of the fields together, but allocates once. := string(.recordBuffer) // Convert to string once to batch allocations = [:0] if cap() < len(.fieldIndexes) { = make([]string, len(.fieldIndexes)) } = [:len(.fieldIndexes)] var int for , := range .fieldIndexes { [] = [:] = } // Check or update the expected fields per record. if .FieldsPerRecord > 0 { if len() != .FieldsPerRecord && == nil { = &ParseError{ StartLine: , Line: , Column: 1, Err: ErrFieldCount, } } } else if .FieldsPerRecord == 0 { .FieldsPerRecord = len() } return , }