// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package tar implements access to tar archives. // // Tape archives (tar) are a file format for storing a sequence of files that // can be read and written in a streaming manner. // This package aims to cover most variations of the format, // including those produced by GNU and BSD tar tools.
package tar import ( ) // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit // architectures. If a large value is encountered when decoding, the result // stored in Header will be the truncated version. var tarinsecurepath = godebug.New("tarinsecurepath") var ( ErrHeader = errors.New("archive/tar: invalid tar header") ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") ErrInsecurePath = errors.New("archive/tar: insecure file path") errMissData = errors.New("archive/tar: sparse file references non-existent data") errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data") errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole") ) type headerError []string func ( headerError) () string { const = "archive/tar: cannot encode header" var []string for , := range { if != "" { = append(, ) } } if len() == 0 { return } return fmt.Sprintf("%s: %v", , strings.Join(, "; and ")) } // Type flags for Header.Typeflag. const ( // Type '0' indicates a regular file. TypeReg = '0' // Deprecated: Use TypeReg instead. TypeRegA = '\x00' // Type '1' to '6' are header-only flags and may not have a data body. TypeLink = '1' // Hard link TypeSymlink = '2' // Symbolic link TypeChar = '3' // Character device node TypeBlock = '4' // Block device node TypeDir = '5' // Directory TypeFifo = '6' // FIFO node // Type '7' is reserved. TypeCont = '7' // Type 'x' is used by the PAX format to store key-value records that // are only relevant to the next file. // This package transparently handles these types. TypeXHeader = 'x' // Type 'g' is used by the PAX format to store key-value records that // are relevant to all subsequent files. // This package only supports parsing and composing such headers, // but does not currently support persisting the global state across files. TypeXGlobalHeader = 'g' // Type 'S' indicates a sparse file in the GNU format. TypeGNUSparse = 'S' // Types 'L' and 'K' are used by the GNU format for a meta file // used to store the path or link name for the next file. // This package transparently handles these types. TypeGNULongName = 'L' TypeGNULongLink = 'K' ) // Keywords for PAX extended header records. const ( paxNone = "" // Indicates that no PAX key is suitable paxPath = "path" paxLinkpath = "linkpath" paxSize = "size" paxUid = "uid" paxGid = "gid" paxUname = "uname" paxGname = "gname" paxMtime = "mtime" paxAtime = "atime" paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid paxCharset = "charset" // Currently unused paxComment = "comment" // Currently unused paxSchilyXattr = "SCHILY.xattr." // Keywords for GNU sparse files in a PAX extended header. paxGNUSparse = "GNU.sparse." paxGNUSparseNumBlocks = "GNU.sparse.numblocks" paxGNUSparseOffset = "GNU.sparse.offset" paxGNUSparseNumBytes = "GNU.sparse.numbytes" paxGNUSparseMap = "GNU.sparse.map" paxGNUSparseName = "GNU.sparse.name" paxGNUSparseMajor = "GNU.sparse.major" paxGNUSparseMinor = "GNU.sparse.minor" paxGNUSparseSize = "GNU.sparse.size" paxGNUSparseRealSize = "GNU.sparse.realsize" ) // basicKeys is a set of the PAX keys for which we have built-in support. // This does not contain "charset" or "comment", which are both PAX-specific, // so adding them as first-class features of Header is unlikely. // Users can use the PAXRecords field to set it themselves. var basicKeys = map[string]bool{ paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, } // A Header represents a single header in a tar archive. // Some fields may not be populated. // // For forward compatibility, users that retrieve a Header from Reader.Next, // mutate it in some ways, and then pass it back to Writer.WriteHeader // should do so by creating a new Header and copying the fields // that they are interested in preserving. type Header struct { // Typeflag is the type of header entry. // The zero value is automatically promoted to either TypeReg or TypeDir // depending on the presence of a trailing slash in Name. Typeflag byte Name string // Name of file entry Linkname string // Target name of link (valid for TypeLink or TypeSymlink) Size int64 // Logical file size in bytes Mode int64 // Permission and mode bits Uid int // User ID of owner Gid int // Group ID of owner Uname string // User name of owner Gname string // Group name of owner // If the Format is unspecified, then Writer.WriteHeader rounds ModTime // to the nearest second and ignores the AccessTime and ChangeTime fields. // // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. // To use sub-second resolution, specify the Format as PAX. ModTime time.Time // Modification time AccessTime time.Time // Access time (requires either PAX or GNU support) ChangeTime time.Time // Change time (requires either PAX or GNU support) Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) // Xattrs stores extended attributes as PAX records under the // "SCHILY.xattr." namespace. // // The following are semantically equivalent: // h.Xattrs[key] = value // h.PAXRecords["SCHILY.xattr."+key] = value // // When Writer.WriteHeader is called, the contents of Xattrs will take // precedence over those in PAXRecords. // // Deprecated: Use PAXRecords instead. Xattrs map[string]string // PAXRecords is a map of PAX extended header records. // // User-defined records should have keys of the following form: // VENDOR.keyword // Where VENDOR is some namespace in all uppercase, and keyword may // not contain the '=' character (e.g., "GOLANG.pkg.version"). // The key and value should be non-empty UTF-8 strings. // // When Writer.WriteHeader is called, PAX records derived from the // other fields in Header take precedence over PAXRecords. PAXRecords map[string]string // Format specifies the format of the tar header. // // This is set by Reader.Next as a best-effort guess at the format. // Since the Reader liberally reads some non-compliant files, // it is possible for this to be FormatUnknown. // // If the format is unspecified when Writer.WriteHeader is called, // then it uses the first format (in the order of USTAR, PAX, GNU) // capable of encoding this Header (see Format). Format Format } // sparseEntry represents a Length-sized fragment at Offset in the file. type sparseEntry struct{ Offset, Length int64 } func ( sparseEntry) () int64 { return .Offset + .Length } // A sparse file can be represented as either a sparseDatas or a sparseHoles. // As long as the total size is known, they are equivalent and one can be // converted to the other form and back. The various tar formats with sparse // file support represent sparse files in the sparseDatas form. That is, they // specify the fragments in the file that has data, and treat everything else as // having zero bytes. As such, the encoding and decoding logic in this package // deals with sparseDatas. // // However, the external API uses sparseHoles instead of sparseDatas because the // zero value of sparseHoles logically represents a normal file (i.e., there are // no holes in it). On the other hand, the zero value of sparseDatas implies // that the file has no data in it, which is rather odd. // // As an example, if the underlying raw file contains the 10-byte data: // // var compactFile = "abcdefgh" // // And the sparse map has the following entries: // // var spd sparseDatas = []sparseEntry{ // {Offset: 2, Length: 5}, // Data fragment for 2..6 // {Offset: 18, Length: 3}, // Data fragment for 18..20 // } // var sph sparseHoles = []sparseEntry{ // {Offset: 0, Length: 2}, // Hole fragment for 0..1 // {Offset: 7, Length: 11}, // Hole fragment for 7..17 // {Offset: 21, Length: 4}, // Hole fragment for 21..24 // } // // Then the content of the resulting sparse file with a Header.Size of 25 is: // // var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 type ( sparseDatas []sparseEntry sparseHoles []sparseEntry ) // validateSparseEntries reports whether sp is a valid sparse map. // It does not matter whether sp represents data fragments or hole fragments. func validateSparseEntries( []sparseEntry, int64) bool { // Validate all sparse entries. These are the same checks as performed by // the BSD tar utility. if < 0 { return false } var sparseEntry for , := range { switch { case .Offset < 0 || .Length < 0: return false // Negative values are never okay case .Offset > math.MaxInt64-.Length: return false // Integer overflow with large length case .endOffset() > : return false // Region extends beyond the actual size case .endOffset() > .Offset: return false // Regions cannot overlap and must be in order } = } return true } // alignSparseEntries mutates src and returns dst where each fragment's // starting offset is aligned up to the nearest block edge, and each // ending offset is aligned down to the nearest block edge. // // Even though the Go tar Reader and the BSD tar utility can handle entries // with arbitrary offsets and lengths, the GNU tar utility can only handle // offsets and lengths that are multiples of blockSize. func alignSparseEntries( []sparseEntry, int64) []sparseEntry { := [:0] for , := range { , := .Offset, .endOffset() += blockPadding(+) // Round-up to nearest blockSize if != { -= blockPadding(-) // Round-down to nearest blockSize } if < { = append(, sparseEntry{Offset: , Length: - }) } } return } // invertSparseEntries converts a sparse map from one form to the other. // If the input is sparseHoles, then it will output sparseDatas and vice-versa. // The input must have been already validated. // // This function mutates src and returns a normalized map where: // - adjacent fragments are coalesced together // - only the last fragment may be empty // - the endOffset of the last fragment is the total size func invertSparseEntries( []sparseEntry, int64) []sparseEntry { := [:0] var sparseEntry for , := range { if .Length == 0 { continue // Skip empty fragments } .Length = .Offset - .Offset if .Length > 0 { = append(, ) // Only add non-empty fragments } .Offset = .endOffset() } .Length = - .Offset // Possibly the only empty fragment return append(, ) } // fileState tracks the number of logical (includes sparse holes) and physical // (actual in tar archive) bytes remaining for the current file. // // Invariant: logicalRemaining >= physicalRemaining type fileState interface { logicalRemaining() int64 physicalRemaining() int64 } // allowedFormats determines which formats can be used. // The value returned is the logical OR of multiple possible formats. // If the value is FormatUnknown, then the input Header cannot be encoded // and an error is returned explaining why. // // As a by-product of checking the fields, this function returns paxHdrs, which // contain all fields that could not be directly encoded. // A value receiver ensures that this method does not mutate the source Header. func ( Header) () ( Format, map[string]string, error) { = FormatUSTAR | FormatPAX | FormatGNU = make(map[string]string) var , , string var bool // Prefer PAX over USTAR := func( string, int, , string) { // NUL-terminator is optional for path and linkpath. // Technically, it is required for uname and gname, // but neither GNU nor BSD tar checks for it. := len() > := == paxPath || == paxLinkpath if hasNUL() || ( && !) { = fmt.Sprintf("GNU cannot encode %s=%q", , ) .mustNotBe(FormatGNU) } if !isASCII() || { := == paxPath if , , := splitUSTARPath(); ! || ! { = fmt.Sprintf("USTAR cannot encode %s=%q", , ) .mustNotBe(FormatUSTAR) } if == paxNone { = fmt.Sprintf("PAX cannot encode %s=%q", , ) .mustNotBe(FormatPAX) } else { [] = } } if , := .PAXRecords[]; && == { [] = } } := func( int64, int, , string) { if !fitsInBase256(, ) { = fmt.Sprintf("GNU cannot encode %s=%d", , ) .mustNotBe(FormatGNU) } if !fitsInOctal(, ) { = fmt.Sprintf("USTAR cannot encode %s=%d", , ) .mustNotBe(FormatUSTAR) if == paxNone { = fmt.Sprintf("PAX cannot encode %s=%d", , ) .mustNotBe(FormatPAX) } else { [] = strconv.FormatInt(, 10) } } if , := .PAXRecords[]; && == strconv.FormatInt(, 10) { [] = } } := func( time.Time, int, , string) { if .IsZero() { return // Always okay } if !fitsInBase256(, .Unix()) { = fmt.Sprintf("GNU cannot encode %s=%v", , ) .mustNotBe(FormatGNU) } := == paxMtime := fitsInOctal(, .Unix()) if ( && !) || ! { = fmt.Sprintf("USTAR cannot encode %s=%v", , ) .mustNotBe(FormatUSTAR) } := .Nanosecond() != 0 if ! || ! || { = true // USTAR may truncate sub-second measurements if == paxNone { = fmt.Sprintf("PAX cannot encode %s=%v", , ) .mustNotBe(FormatPAX) } else { [] = formatPAXTime() } } if , := .PAXRecords[]; && == formatPAXTime() { [] = } } // Check basic fields. var block := .toV7() := .toUSTAR() := .toGNU() (.Name, len(.name()), "Name", paxPath) (.Linkname, len(.linkName()), "Linkname", paxLinkpath) (.Uname, len(.userName()), "Uname", paxUname) (.Gname, len(.groupName()), "Gname", paxGname) (.Mode, len(.mode()), "Mode", paxNone) (int64(.Uid), len(.uid()), "Uid", paxUid) (int64(.Gid), len(.gid()), "Gid", paxGid) (.Size, len(.size()), "Size", paxSize) (.Devmajor, len(.devMajor()), "Devmajor", paxNone) (.Devminor, len(.devMinor()), "Devminor", paxNone) (.ModTime, len(.modTime()), "ModTime", paxMtime) (.AccessTime, len(.accessTime()), "AccessTime", paxAtime) (.ChangeTime, len(.changeTime()), "ChangeTime", paxCtime) // Check for header-only types. var , string switch .Typeflag { case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: // Exclude TypeLink and TypeSymlink, since they may reference directories. if strings.HasSuffix(.Name, "/") { return FormatUnknown, nil, headerError{"filename may not have trailing slash"} } case TypeXHeader, TypeGNULongName, TypeGNULongLink: return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} case TypeXGlobalHeader: := Header{Name: .Name, Typeflag: .Typeflag, Xattrs: .Xattrs, PAXRecords: .PAXRecords, Format: .Format} if !reflect.DeepEqual(, ) { return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"} } = "only PAX supports TypeXGlobalHeader" .mayOnlyBe(FormatPAX) } if !isHeaderOnlyType(.Typeflag) && .Size < 0 { return FormatUnknown, nil, headerError{"negative size on header-only type"} } // Check PAX records. if len(.Xattrs) > 0 { for , := range .Xattrs { [paxSchilyXattr+] = } = "only PAX supports Xattrs" .mayOnlyBe(FormatPAX) } if len(.PAXRecords) > 0 { for , := range .PAXRecords { switch , := []; { case : continue // Do not overwrite existing records case .Typeflag == TypeXGlobalHeader: [] = // Copy all records case !basicKeys[] && !strings.HasPrefix(, paxGNUSparse): [] = // Ignore local records that may conflict } } = "only PAX supports PAXRecords" .mayOnlyBe(FormatPAX) } for , := range { if !validPAXRecord(, ) { return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", +" = "+)} } } // TODO(dsnet): Re-enable this when adding sparse support. // See https://golang.org/issue/22735 /* // Check sparse files. if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { if isHeaderOnlyType(h.Typeflag) { return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} } if !validateSparseEntries(h.SparseHoles, h.Size) { return FormatUnknown, nil, headerError{"invalid sparse holes"} } if h.Typeflag == TypeGNUSparse { whyOnlyGNU = "only GNU supports TypeGNUSparse" format.mayOnlyBe(FormatGNU) } else { whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" format.mustNotBe(FormatGNU) } whyNoUSTAR = "USTAR does not support sparse files" format.mustNotBe(FormatUSTAR) } */ // Check desired format. if := .Format; != FormatUnknown { if .has(FormatPAX) && ! { .mayBe(FormatUSTAR) // PAX implies USTAR allowed too } .mayOnlyBe() // Set union of formats allowed and format wanted } if == FormatUnknown { switch .Format { case FormatUSTAR: = headerError{"Format specifies USTAR", , , } case FormatPAX: = headerError{"Format specifies PAX", , } case FormatGNU: = headerError{"Format specifies GNU", , } default: = headerError{, , , , } } } return , , } // FileInfo returns an fs.FileInfo for the Header. func ( *Header) () fs.FileInfo { return headerFileInfo{} } // headerFileInfo implements fs.FileInfo. type headerFileInfo struct { h *Header } func ( headerFileInfo) () int64 { return .h.Size } func ( headerFileInfo) () bool { return .Mode().IsDir() } func ( headerFileInfo) () time.Time { return .h.ModTime } func ( headerFileInfo) () any { return .h } // Name returns the base name of the file. func ( headerFileInfo) () string { if .IsDir() { return path.Base(path.Clean(.h.Name)) } return path.Base(.h.Name) } // Mode returns the permission and mode bits for the headerFileInfo. func ( headerFileInfo) () ( fs.FileMode) { // Set file permission bits. = fs.FileMode(.h.Mode).Perm() // Set setuid, setgid and sticky bits. if .h.Mode&c_ISUID != 0 { |= fs.ModeSetuid } if .h.Mode&c_ISGID != 0 { |= fs.ModeSetgid } if .h.Mode&c_ISVTX != 0 { |= fs.ModeSticky } // Set file mode bits; clear perm, setuid, setgid, and sticky bits. switch := fs.FileMode(.h.Mode) &^ 07777; { case c_ISDIR: |= fs.ModeDir case c_ISFIFO: |= fs.ModeNamedPipe case c_ISLNK: |= fs.ModeSymlink case c_ISBLK: |= fs.ModeDevice case c_ISCHR: |= fs.ModeDevice |= fs.ModeCharDevice case c_ISSOCK: |= fs.ModeSocket } switch .h.Typeflag { case TypeSymlink: |= fs.ModeSymlink case TypeChar: |= fs.ModeDevice |= fs.ModeCharDevice case TypeBlock: |= fs.ModeDevice case TypeDir: |= fs.ModeDir case TypeFifo: |= fs.ModeNamedPipe } return } func ( headerFileInfo) () string { return fs.FormatFileInfo() } // sysStat, if non-nil, populates h from system-dependent fields of fi. var sysStat func(fi fs.FileInfo, h *Header, doNameLookups bool) error const ( // Mode constants from the USTAR spec: // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 c_ISUID = 04000 // Set uid c_ISGID = 02000 // Set gid c_ISVTX = 01000 // Save text (sticky bit) // Common Unix mode constants; these are not defined in any common tar standard. // Header.FileInfo understands these, but FileInfoHeader will never produce these. c_ISDIR = 040000 // Directory c_ISFIFO = 010000 // FIFO c_ISREG = 0100000 // Regular file c_ISLNK = 0120000 // Symbolic link c_ISBLK = 060000 // Block special file c_ISCHR = 020000 // Character special file c_ISSOCK = 0140000 // Socket ) // FileInfoHeader creates a partially-populated [Header] from fi. // If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a directory, a slash is appended to the name. // // Since fs.FileInfo's Name method only returns the base name of // the file it describes, it may be necessary to modify Header.Name // to provide the full path name of the file. // // If fi implements [FileInfoNames] // Header.Gname and Header.Uname // are provided by the methods of the interface. func ( fs.FileInfo, string) (*Header, error) { if == nil { return nil, errors.New("archive/tar: FileInfo is nil") } := .Mode() := &Header{ Name: .Name(), ModTime: .ModTime(), Mode: int64(.Perm()), // or'd with c_IS* constants later } switch { case .IsRegular(): .Typeflag = TypeReg .Size = .Size() case .IsDir(): .Typeflag = TypeDir .Name += "/" case &fs.ModeSymlink != 0: .Typeflag = TypeSymlink .Linkname = case &fs.ModeDevice != 0: if &fs.ModeCharDevice != 0 { .Typeflag = TypeChar } else { .Typeflag = TypeBlock } case &fs.ModeNamedPipe != 0: .Typeflag = TypeFifo case &fs.ModeSocket != 0: return nil, fmt.Errorf("archive/tar: sockets not supported") default: return nil, fmt.Errorf("archive/tar: unknown file mode %v", ) } if &fs.ModeSetuid != 0 { .Mode |= c_ISUID } if &fs.ModeSetgid != 0 { .Mode |= c_ISGID } if &fs.ModeSticky != 0 { .Mode |= c_ISVTX } // If possible, populate additional fields from OS-specific // FileInfo fields. if , := .Sys().(*Header); { // This FileInfo came from a Header (not the OS). Use the // original Header to populate all remaining fields. .Uid = .Uid .Gid = .Gid .Uname = .Uname .Gname = .Gname .AccessTime = .AccessTime .ChangeTime = .ChangeTime if .Xattrs != nil { .Xattrs = make(map[string]string) for , := range .Xattrs { .Xattrs[] = } } if .Typeflag == TypeLink { // hard link .Typeflag = TypeLink .Size = 0 .Linkname = .Linkname } if .PAXRecords != nil { .PAXRecords = make(map[string]string) for , := range .PAXRecords { .PAXRecords[] = } } } var = true if , := .(FileInfoNames); { = false var error .Gname, = .Gname() if != nil { return nil, } .Uname, = .Uname() if != nil { return nil, } } if sysStat != nil { return , sysStat(, , ) } return , nil } // FileInfoNames extends [fs.FileInfo]. // Passing an instance of this to [FileInfoHeader] permits the caller // to avoid a system-dependent name lookup by specifying the Uname and Gname directly. type FileInfoNames interface { fs.FileInfo // Uname should give a user name. Uname() (string, error) // Gname should give a group name. Gname() (string, error) } // isHeaderOnlyType checks if the given type flag is of the type that has no // data section even if a size is specified. func isHeaderOnlyType( byte) bool { switch { case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: return true default: return false } }