// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package tarimport ()// Reader provides sequential access to the contents of a tar archive.// Reader.Next advances to the next file in the archive (including the first),// and then Reader can be treated as an io.Reader to access the file's data.typeReaderstruct { r io.Reader pad int64// Amount of padding (ignored) after current file entry curr fileReader// Reader for current file entry blk block// Buffer to use as temporary local storage// err is a persistent error. // It is only the responsibility of every exported method of Reader to // ensure that this error is sticky. err error}type fileReader interface {io.ReaderfileState WriteTo(io.Writer) (int64, error)}// NewReader creates a new [Reader] reading from r.func ( io.Reader) *Reader {return &Reader{r: , curr: ®FileReader{, 0}}}// Next advances to the next entry in the tar archive.// The Header.Size determines how many bytes can be read for the next file.// Any remaining data in the current file is automatically discarded.// At the end of the archive, Next returns the error io.EOF.//// If Next encounters a non-local name (as defined by [filepath.IsLocal])// and the GODEBUG environment variable contains `tarinsecurepath=0`,// Next returns the header with an [ErrInsecurePath] error.// A future version of Go may introduce this behavior by default.// Programs that want to accept non-local names can ignore// the [ErrInsecurePath] error and use the returned header.func ( *Reader) () (*Header, error) {if .err != nil {returnnil, .err } , := .next() .err = if == nil && !filepath.IsLocal(.Name) {iftarinsecurepath.Value() == "0" {tarinsecurepath.IncNonDefault() = ErrInsecurePath } }return , }func ( *Reader) () (*Header, error) {varmap[string]stringvar , string// Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not // normally be visible to the outside. As such, this loop iterates through // one or more "header files" until it finds a "normal file". := FormatUSTAR | FormatPAX | FormatGNUfor {// Discard the remainder of the file and any padding.if := discard(.r, .curr.physicalRemaining()); != nil {returnnil, }if , := tryReadFull(.r, .blk[:.pad]); != nil {returnnil, } .pad = 0 , , := .readHeader()if != nil {returnnil, }if := .handleRegularFile(); != nil {returnnil, } .mayOnlyBe(.Format)// Check for PAX/GNU special headers and files.switch .Typeflag {caseTypeXHeader, TypeXGlobalHeader: .mayOnlyBe(FormatPAX) , = parsePAX()if != nil {returnnil, }if .Typeflag == TypeXGlobalHeader {mergePAX(, )return &Header{Name: .Name,Typeflag: .Typeflag,Xattrs: .Xattrs,PAXRecords: .PAXRecords,Format: , }, nil }continue// This is a meta header affecting the next headercaseTypeGNULongName, TypeGNULongLink: .mayOnlyBe(FormatGNU) , := readSpecialFile()if != nil {returnnil, }varparserswitch .Typeflag {caseTypeGNULongName: = .parseString()caseTypeGNULongLink: = .parseString() }continue// This is a meta header affecting the next headerdefault:// The old GNU sparse format is handled here since it is technically // just a regular file with additional attributes.if := mergePAX(, ); != nil {returnnil, }if != "" { .Name = }if != "" { .Linkname = }if .Typeflag == TypeRegA {ifstrings.HasSuffix(.Name, "/") { .Typeflag = TypeDir// Legacy archives use trailing slash for directories } else { .Typeflag = TypeReg } }// The extended headers may have updated the size. // Thus, setup the regFileReader again after merging PAX headers.if := .handleRegularFile(); != nil {returnnil, }// Sparse formats rely on being able to read from the logical data // section; there must be a preceding call to handleRegularFile.if := .handleSparseFile(, ); != nil {returnnil, }// Set the final guess at the format.if .has(FormatUSTAR) && .has(FormatPAX) { .mayOnlyBe(FormatUSTAR) } .Format = return , nil// This is a file, so stop } }}// handleRegularFile sets up the current file reader and padding such that it// can only read the following logical data section. It will properly handle// special headers that contain no data section.func ( *Reader) ( *Header) error { := .SizeifisHeaderOnlyType(.Typeflag) { = 0 }if < 0 {returnErrHeader } .pad = blockPadding() .curr = ®FileReader{r: .r, nb: }returnnil}// handleSparseFile checks if the current file is a sparse format of any type// and sets the curr reader appropriately.func ( *Reader) ( *Header, *block) error {varsparseDatasvarerrorif .Typeflag == TypeGNUSparse { , = .readOldGNUSparseMap(, ) } else { , = .readGNUSparsePAXHeaders() }// If sp is non-nil, then this is a sparse file. // Note that it is possible for len(sp) == 0.if == nil && != nil {ifisHeaderOnlyType(.Typeflag) || !validateSparseEntries(, .Size) {returnErrHeader } := invertSparseEntries(, .Size) .curr = &sparseFileReader{.curr, , 0} }return}// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.// If they are found, then this function reads the sparse map and returns it.// This assumes that 0.0 headers have already been converted to 0.1 headers// by the PAX header parsing logic.func ( *Reader) ( *Header) (sparseDatas, error) {// Identify the version of GNU headers.varbool , := .PAXRecords[paxGNUSparseMajor], .PAXRecords[paxGNUSparseMinor]switch {case == "0" && ( == "0" || == "1"): = falsecase == "1" && == "0": = truecase != "" || != "":returnnil, nil// Unknown GNU sparse PAX versioncase .PAXRecords[paxGNUSparseMap] != "": = false// 0.0 and 0.1 did not have explicit version records, so guessdefault:returnnil, nil// Not a PAX format GNU sparse file. } .Format.mayOnlyBe(FormatPAX)// Update hdr from GNU sparse PAX headers.if := .PAXRecords[paxGNUSparseName]; != "" { .Name = } := .PAXRecords[paxGNUSparseSize]if == "" { = .PAXRecords[paxGNUSparseRealSize] }if != "" { , := strconv.ParseInt(, 10, 64)if != nil {returnnil, ErrHeader } .Size = }// Read the sparse map according to the appropriate format.if {returnreadGNUSparseMap1x0(.curr) }returnreadGNUSparseMap0x1(.PAXRecords)}// mergePAX merges paxHdrs into hdr for all relevant fields of Header.func mergePAX( *Header, map[string]string) ( error) {for , := range {if == "" {continue// Keep the original USTAR value }varint64switch {casepaxPath: .Name = casepaxLinkpath: .Linkname = casepaxUname: .Uname = casepaxGname: .Gname = casepaxUid: , = strconv.ParseInt(, 10, 64) .Uid = int() // Integer overflow possiblecasepaxGid: , = strconv.ParseInt(, 10, 64) .Gid = int() // Integer overflow possiblecasepaxAtime: .AccessTime, = parsePAXTime()casepaxMtime: .ModTime, = parsePAXTime()casepaxCtime: .ChangeTime, = parsePAXTime()casepaxSize: .Size, = strconv.ParseInt(, 10, 64)default:ifstrings.HasPrefix(, paxSchilyXattr) {if .Xattrs == nil { .Xattrs = make(map[string]string) } .Xattrs[[len(paxSchilyXattr):]] = } }if != nil {returnErrHeader } } .PAXRecords = returnnil}// parsePAX parses PAX headers.// If an extended header (type 'x') is invalid, ErrHeader is returned.func parsePAX( io.Reader) (map[string]string, error) { , := readSpecialFile()if != nil {returnnil, } := string()// For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into format 0.1 // headers since 0.0 headers were not PAX compliant.var []string := make(map[string]string)forlen() > 0 { , , , := parsePAXRecord()if != nil {returnnil, ErrHeader } = switch {casepaxGNUSparseOffset, paxGNUSparseNumBytes:// Validate sparse header order and value.if (len()%2 == 0 && != paxGNUSparseOffset) || (len()%2 == 1 && != paxGNUSparseNumBytes) ||strings.Contains(, ",") {returnnil, ErrHeader } = append(, )default: [] = } }iflen() > 0 { [paxGNUSparseMap] = strings.Join(, ",") }return , nil}// readHeader reads the next block header and assumes that the underlying reader// is already aligned to a block boundary. It returns the raw block of the// header in case further processing is required.//// The err will be set to io.EOF only when one of the following occurs:// - Exactly 0 bytes are read and EOF is hit.// - Exactly 1 block of zeros is read and EOF is hit.// - At least 2 blocks of zeros are read.func ( *Reader) () (*Header, *block, error) {// Two blocks of zero bytes marks the end of the archive.if , := io.ReadFull(.r, .blk[:]); != nil {returnnil, nil, // EOF is okay here; exactly 0 bytes read }ifbytes.Equal(.blk[:], zeroBlock[:]) {if , := io.ReadFull(.r, .blk[:]); != nil {returnnil, nil, // EOF is okay here; exactly 1 block of zeros read }ifbytes.Equal(.blk[:], zeroBlock[:]) {returnnil, nil, io.EOF// normal EOF; exactly 2 block of zeros read }returnnil, nil, ErrHeader// Zero block and then non-zero block }// Verify the header matches a known format. := .blk.getFormat()if == FormatUnknown {returnnil, nil, ErrHeader }varparser := new(Header)// Unpack the V7 header. := .blk.toV7() .Typeflag = .typeFlag()[0] .Name = .parseString(.name()) .Linkname = .parseString(.linkName()) .Size = .parseNumeric(.size()) .Mode = .parseNumeric(.mode()) .Uid = int(.parseNumeric(.uid())) .Gid = int(.parseNumeric(.gid())) .ModTime = time.Unix(.parseNumeric(.modTime()), 0)// Unpack format specific fields.if > formatV7 { := .blk.toUSTAR() .Uname = .parseString(.userName()) .Gname = .parseString(.groupName()) .Devmajor = .parseNumeric(.devMajor()) .Devminor = .parseNumeric(.devMinor())varstringswitch {case .has(FormatUSTAR | FormatPAX): .Format = := .blk.toUSTAR() = .parseString(.prefix())// For Format detection, check if block is properly formatted since // the parser is more liberal than what USTAR actually permits. := func( rune) bool { return >= 0x80 }ifbytes.IndexFunc(.blk[:], ) >= 0 { .Format = FormatUnknown// Non-ASCII characters in block. } := func( []byte) bool { returnint([len()-1]) == 0 }if !((.size()) && (.mode()) && (.uid()) && (.gid()) && (.modTime()) && (.devMajor()) && (.devMinor())) { .Format = FormatUnknown// Numeric fields must end in NUL }case .has(formatSTAR): := .blk.toSTAR() = .parseString(.prefix()) .AccessTime = time.Unix(.parseNumeric(.accessTime()), 0) .ChangeTime = time.Unix(.parseNumeric(.changeTime()), 0)case .has(FormatGNU): .Format = varparser := .blk.toGNU()if := .accessTime(); [0] != 0 { .AccessTime = time.Unix(.parseNumeric(), 0) }if := .changeTime(); [0] != 0 { .ChangeTime = time.Unix(.parseNumeric(), 0) }// Prior to Go1.8, the Writer had a bug where it would output // an invalid tar file in certain rare situations because the logic // incorrectly believed that the old GNU format had a prefix field. // This is wrong and leads to an output file that mangles the // atime and ctime fields, which are often left unused. // // In order to continue reading tar files created by former, buggy // versions of Go, we skeptically parse the atime and ctime fields. // If we are unable to parse them and the prefix field looks like // an ASCII string, then we fallback on the pre-Go1.8 behavior // of treating these fields as the USTAR prefix field. // // Note that this will not use the fallback logic for all possible // files generated by a pre-Go1.8 toolchain. If the generated file // happened to have a prefix field that parses as valid // atime and ctime fields (e.g., when they are valid octal strings), // then it is impossible to distinguish between a valid GNU file // and an invalid pre-Go1.8 file. // // See https://golang.org/issues/12594 // See https://golang.org/issues/21005if .err != nil { .AccessTime, .ChangeTime = time.Time{}, time.Time{} := .blk.toUSTAR()if := .parseString(.prefix()); isASCII() { = } .Format = FormatUnknown// Buggy file is not GNU } }iflen() > 0 { .Name = + "/" + .Name } }return , &.blk, .err}// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.// The sparse map is stored in the tar header if it's small enough.// If it's larger than four entries, then one or more extension headers are used// to store the rest of the sparse map.//// The Header.Size does not reflect the size of any extended headers used.// Thus, this function will read from the raw io.Reader to fetch extra headers.// This method mutates blk in the process.func ( *Reader) ( *Header, *block) (sparseDatas, error) {// Make sure that the input format is GNU. // Unfortunately, the STAR format also has a sparse header format that uses // the same type flag but has a completely different layout.if .getFormat() != FormatGNU {returnnil, ErrHeader } .Format.mayOnlyBe(FormatGNU)varparser .Size = .parseNumeric(.toGNU().realSize())if .err != nil {returnnil, .err } := .toGNU().sparse() := make(sparseDatas, 0, .maxEntries())for {for := 0; < .maxEntries(); ++ {// This termination condition is identical to GNU and BSD tar.if .entry().offset()[0] == 0x00 {break// Don't return, need to process extended headers (even if empty) } := .parseNumeric(.entry().offset()) := .parseNumeric(.entry().length())if .err != nil {returnnil, .err } = append(, sparseEntry{Offset: , Length: }) }if .isExtended()[0] > 0 {// There are more entries. Read an extension header and parse its entries.if , := mustReadFull(.r, [:]); != nil {returnnil, } = .toSparse()continue }return , nil// Done }}// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format// version 1.0. The format of the sparse map consists of a series of// newline-terminated numeric fields. The first field is the number of entries// and is always present. Following this are the entries, consisting of two// fields (offset, length). This function must stop reading at the end// boundary of the block containing the last newline.//// Note that the GNU manual says that numeric values should be encoded in octal// format. However, the GNU tar utility itself outputs these values in decimal.// As such, this library treats values as being encoded in decimal.func readGNUSparseMap1x0( io.Reader) (sparseDatas, error) {var (int64bytes.Bufferblock )// feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. := func( int64) error {for < {if , := mustReadFull(, [:]); != nil {return } .Write([:])for , := range {if == '\n' { ++ } } }returnnil }// nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. := func() string { -- , := .ReadString('\n')returnstrings.TrimRight(, "\n") }// Parse for the number of entries. // Use integer overflow resistant math to check this.if := (1); != nil {returnnil, } , := strconv.ParseInt((), 10, 0) // Intentionally parse as native intif != nil || < 0 || int(2*) < int() {returnnil, ErrHeader }// Parse for all member entries. // numEntries is trusted after this since a potential attacker must have // committed resources proportional to what this library used.if := (2 * ); != nil {returnnil, } := make(sparseDatas, 0, )for := int64(0); < ; ++ { , := strconv.ParseInt((), 10, 64) , := strconv.ParseInt((), 10, 64)if != nil || != nil {returnnil, ErrHeader } = append(, sparseEntry{Offset: , Length: }) }return , nil}// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format// version 0.1. The sparse map is stored in the PAX headers.func readGNUSparseMap0x1( map[string]string) (sparseDatas, error) {// Get number of entries. // Use integer overflow resistant math to check this. := [paxGNUSparseNumBlocks] , := strconv.ParseInt(, 10, 0) // Intentionally parse as native intif != nil || < 0 || int(2*) < int() {returnnil, ErrHeader }// There should be two numbers in sparseMap for each entry. := strings.Split([paxGNUSparseMap], ",")iflen() == 1 && [0] == "" { = [:0] }ifint64(len()) != 2* {returnnil, ErrHeader }// Loop through the entries in the sparse map. // numEntries is trusted now. := make(sparseDatas, 0, )forlen() >= 2 { , := strconv.ParseInt([0], 10, 64) , := strconv.ParseInt([1], 10, 64)if != nil || != nil {returnnil, ErrHeader } = append(, sparseEntry{Offset: , Length: }) = [2:] }return , nil}// Read reads from the current file in the tar archive.// It returns (0, io.EOF) when it reaches the end of that file,// until [Next] is called to advance to the next file.//// If the current file is sparse, then the regions marked as a hole// are read back as NUL-bytes.//// Calling Read on special types like [TypeLink], [TypeSymlink], [TypeChar],// [TypeBlock], [TypeDir], and [TypeFifo] returns (0, [io.EOF]) regardless of what// the [Header.Size] claims.func ( *Reader) ( []byte) (int, error) {if .err != nil {return0, .err } , := .curr.Read()if != nil && != io.EOF { .err = }return , }// writeTo writes the content of the current file to w.// The bytes written matches the number of remaining bytes in the current file.//// If the current file is sparse and w is an io.WriteSeeker,// then writeTo uses Seek to skip past holes defined in Header.SparseHoles,// assuming that skipped regions are filled with NULs.// This always writes the last byte to ensure w is the right size.//// TODO(dsnet): Re-export this when adding sparse file support.// See https://golang.org/issue/22735func ( *Reader) ( io.Writer) (int64, error) {if .err != nil {return0, .err } , := .curr.WriteTo()if != nil { .err = }return , }// regFileReader is a fileReader for reading data from a regular file entry.type regFileReader struct { r io.Reader// Underlying Reader nb int64// Number of remaining bytes to read}func ( *regFileReader) ( []byte) ( int, error) {ifint64(len()) > .nb { = [:.nb] }iflen() > 0 { , = .r.Read() .nb -= int64() }switch {case == io.EOF && .nb > 0:return , io.ErrUnexpectedEOFcase == nil && .nb == 0:return , io.EOFdefault:return , }}func ( *regFileReader) ( io.Writer) (int64, error) {returnio.Copy(, struct{ io.Reader }{})}// logicalRemaining implements fileState.logicalRemaining.func ( regFileReader) () int64 {return .nb}// physicalRemaining implements fileState.physicalRemaining.func ( regFileReader) () int64 {return .nb}// sparseFileReader is a fileReader for reading data from a sparse file entry.type sparseFileReader struct { fr fileReader// Underlying fileReader sp sparseHoles// Normalized list of sparse holes pos int64// Current position in sparse file}func ( *sparseFileReader) ( []byte) ( int, error) { := int64(len()) >= .logicalRemaining()if { = [:.logicalRemaining()] } := := .pos + int64(len())for > .pos && == nil {varint// Bytes read in fragment , := .sp[0].Offset, .sp[0].endOffset()if .pos < { // In a data fragment := [:min(int64(len()), -.pos)] , = tryReadFull(.fr, ) } else { // In a hole fragment := [:min(int64(len()), -.pos)] , = tryReadFull(zeroReader{}, ) } = [:] .pos += int64()if .pos >= && len(.sp) > 1 { .sp = .sp[1:] // Ensure last fragment always remains } } = len() - len()switch {case == io.EOF:return , errMissData// Less data in dense file than sparse filecase != nil:return , case .logicalRemaining() == 0 && .physicalRemaining() > 0:return , errUnrefData// More data in dense file than sparse filecase :return , io.EOFdefault:return , nil }}func ( *sparseFileReader) ( io.Writer) ( int64, error) { , := .(io.WriteSeeker)if {if , := .Seek(0, io.SeekCurrent); != nil { = false// Not all io.Seeker can really seek } }if ! {returnio.Copy(, struct{ io.Reader }{}) }varbool := .posfor .logicalRemaining() > 0 && ! && == nil {varint64// Size of fragment , := .sp[0].Offset, .sp[0].endOffset()if .pos < { // In a data fragment = - .pos , = io.CopyN(, .fr, ) } else { // In a hole fragment = - .posif .physicalRemaining() == 0 { = true -- } _, = .Seek(, io.SeekCurrent) } .pos += if .pos >= && len(.sp) > 1 { .sp = .sp[1:] // Ensure last fragment always remains } }// If the last fragment is a hole, then seek to 1-byte before EOF, and // write a single byte to ensure the file is the right size.if && == nil { _, = .Write([]byte{0}) .pos++ } = .pos - switch {case == io.EOF:return , errMissData// Less data in dense file than sparse filecase != nil:return , case .logicalRemaining() == 0 && .physicalRemaining() > 0:return , errUnrefData// More data in dense file than sparse filedefault:return , nil }}func ( sparseFileReader) () int64 {return .sp[len(.sp)-1].endOffset() - .pos}func ( sparseFileReader) () int64 {return .fr.physicalRemaining()}type zeroReader struct{}func (zeroReader) ( []byte) (int, error) {clear()returnlen(), nil}// mustReadFull is like io.ReadFull except it returns// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.func mustReadFull( io.Reader, []byte) (int, error) { , := tryReadFull(, )if == io.EOF { = io.ErrUnexpectedEOF }return , }// tryReadFull is like io.ReadFull except it returns// io.EOF when it is hit before len(b) bytes are read.func tryReadFull( io.Reader, []byte) ( int, error) {forlen() > && == nil {varint , = .Read([:]) += }iflen() == && == io.EOF { = nil }return , }// readSpecialFile is like io.ReadAll except it returns// ErrFieldTooLong if more than maxSpecialFileSize is read.func readSpecialFile( io.Reader) ([]byte, error) { , := io.ReadAll(io.LimitReader(, maxSpecialFileSize+1))iflen() > maxSpecialFileSize {returnnil, ErrFieldTooLong }return , }// discard skips n bytes in r, reporting an error if unable to do so.func discard( io.Reader, int64) error {// If possible, Seek to the last byte before the end of the data section. // Do this because Seek is often lazy about reporting errors; this will mask // the fact that the stream may be truncated. We can rely on the // io.CopyN done shortly afterwards to trigger any IO errors.varint64// Number of bytes skipped via Seekif , := .(io.Seeker); && > 1 {// Not all io.Seeker can actually Seek. For example, os.Stdin implements // io.Seeker, but calling Seek always returns an error and performs // no action. Thus, we try an innocent seek to the current position // to see if Seek is really supported. , := .Seek(0, io.SeekCurrent)if >= 0 && == nil {// Seek seems supported, so perform the real Seek. , := .Seek(-1, io.SeekCurrent)if < 0 || != nil {return } = - } } , := io.CopyN(io.Discard, , -)if == io.EOF && + < { = io.ErrUnexpectedEOF }return}
The pages are generated with Goldsv0.7.0-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.