// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package tar

import 

// Format represents the tar archive format.
//
// The original tar format was introduced in Unix V7.
// Since then, there have been multiple competing formats attempting to
// standardize or extend the V7 format to overcome its limitations.
// The most common formats are the USTAR, PAX, and GNU formats,
// each with their own advantages and limitations.
//
// The following table captures the capabilities of each format:
//
//	                  |  USTAR |       PAX |       GNU
//	------------------+--------+-----------+----------
//	Name              |   256B | unlimited | unlimited
//	Linkname          |   100B | unlimited | unlimited
//	Size              | uint33 | unlimited |    uint89
//	Mode              | uint21 |    uint21 |    uint57
//	Uid/Gid           | uint21 | unlimited |    uint57
//	Uname/Gname       |    32B | unlimited |       32B
//	ModTime           | uint33 | unlimited |     int89
//	AccessTime        |    n/a | unlimited |     int89
//	ChangeTime        |    n/a | unlimited |     int89
//	Devmajor/Devminor | uint21 |    uint21 |    uint57
//	------------------+--------+-----------+----------
//	string encoding   |  ASCII |     UTF-8 |    binary
//	sub-second times  |     no |       yes |        no
//	sparse files      |     no |       yes |       yes
//
// The table's upper portion shows the [Header] fields, where each format reports
// the maximum number of bytes allowed for each string field and
// the integer type used to store each numeric field
// (where timestamps are stored as the number of seconds since the Unix epoch).
//
// The table's lower portion shows specialized features of each format,
// such as supported string encodings, support for sub-second timestamps,
// or support for sparse files.
//
// The Writer currently provides no support for sparse files.
type Format int

// Constants to identify various tar formats.
const (
	// Deliberately hide the meaning of constants from public API.
	_ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...

	// FormatUnknown indicates that the format is unknown.
	FormatUnknown

	// The format of the original Unix V7 tar tool prior to standardization.
	formatV7

	// FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
	//
	// While this format is compatible with most tar readers,
	// the format has several limitations making it unsuitable for some usages.
	// Most notably, it cannot support sparse files, files larger than 8GiB,
	// filenames larger than 256 characters, and non-ASCII filenames.
	//
	// Reference:
	//	http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
	FormatUSTAR

	// FormatPAX represents the PAX header format defined in POSIX.1-2001.
	//
	// PAX extends USTAR by writing a special file with Typeflag TypeXHeader
	// preceding the original header. This file contains a set of key-value
	// records, which are used to overcome USTAR's shortcomings, in addition to
	// providing the ability to have sub-second resolution for timestamps.
	//
	// Some newer formats add their own extensions to PAX by defining their
	// own keys and assigning certain semantic meaning to the associated values.
	// For example, sparse file support in PAX is implemented using keys
	// defined by the GNU manual (e.g., "GNU.sparse.map").
	//
	// Reference:
	//	http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
	FormatPAX

	// FormatGNU represents the GNU header format.
	//
	// The GNU header format is older than the USTAR and PAX standards and
	// is not compatible with them. The GNU format supports
	// arbitrary file sizes, filenames of arbitrary encoding and length,
	// sparse files, and other features.
	//
	// It is recommended that PAX be chosen over GNU unless the target
	// application can only parse GNU formatted archives.
	//
	// Reference:
	//	https://www.gnu.org/software/tar/manual/html_node/Standard.html
	FormatGNU

	// Schily's tar format, which is incompatible with USTAR.
	// This does not cover STAR extensions to the PAX format; these fall under
	// the PAX format.
	formatSTAR

	formatMax
)

func ( Format) ( Format) bool   { return & != 0 }
func ( *Format) ( Format)     { * |=  }
func ( *Format) ( Format) { * &=  }
func ( *Format) ( Format) { * &^=  }

var formatNames = map[Format]string{
	formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
}

func ( Format) () string {
	var  []string
	for  := Format(1);  < formatMax;  <<= 1 {
		if .has() {
			 = append(, formatNames[])
		}
	}
	switch len() {
	case 0:
		return "<unknown>"
	case 1:
		return [0]
	default:
		return "(" + strings.Join(, " | ") + ")"
	}
}

// Magics used to identify various formats.
const (
	magicGNU, versionGNU     = "ustar ", " \x00"
	magicUSTAR, versionUSTAR = "ustar\x00", "00"
	trailerSTAR              = "tar\x00"
)

// Size constants from various tar specifications.
const (
	blockSize  = 512 // Size of each block in a tar stream
	nameSize   = 100 // Max length of the name field in USTAR format
	prefixSize = 155 // Max length of the prefix field in USTAR format

	// Max length of a special file (PAX header, GNU long name or link).
	// This matches the limit used by libarchive.
	maxSpecialFileSize = 1 << 20
)

// blockPadding computes the number of bytes needed to pad offset up to the
// nearest block edge where 0 <= n < blockSize.
func blockPadding( int64) ( int64) {
	return - & (blockSize - 1)
}

var zeroBlock block

type block [blockSize]byte

// Convert block to any number of formats.
func ( *block) () *headerV7       { return (*headerV7)() }
func ( *block) () *headerGNU     { return (*headerGNU)() }
func ( *block) () *headerSTAR   { return (*headerSTAR)() }
func ( *block) () *headerUSTAR { return (*headerUSTAR)() }
func ( *block) () sparseArray { return sparseArray([:]) }

// getFormat checks that the block is a valid tar header based on the checksum.
// It then attempts to guess the specific format based on magic values.
// If the checksum fails, then FormatUnknown is returned.
func ( *block) () Format {
	// Verify checksum.
	var  parser
	 := .parseOctal(.toV7().chksum())
	,  := .computeChecksum()
	if .err != nil || ( !=  &&  != ) {
		return FormatUnknown
	}

	// Guess the magic values.
	 := string(.toUSTAR().magic())
	 := string(.toUSTAR().version())
	 := string(.toSTAR().trailer())
	switch {
	case  == magicUSTAR &&  == trailerSTAR:
		return formatSTAR
	case  == magicUSTAR:
		return FormatUSTAR | FormatPAX
	case  == magicGNU &&  == versionGNU:
		return FormatGNU
	default:
		return formatV7
	}
}

// setFormat writes the magic values necessary for specified format
// and then updates the checksum accordingly.
func ( *block) ( Format) {
	// Set the magic values.
	switch {
	case .has(formatV7):
		// Do nothing.
	case .has(FormatGNU):
		copy(.toGNU().magic(), magicGNU)
		copy(.toGNU().version(), versionGNU)
	case .has(formatSTAR):
		copy(.toSTAR().magic(), magicUSTAR)
		copy(.toSTAR().version(), versionUSTAR)
		copy(.toSTAR().trailer(), trailerSTAR)
	case .has(FormatUSTAR | FormatPAX):
		copy(.toUSTAR().magic(), magicUSTAR)
		copy(.toUSTAR().version(), versionUSTAR)
	default:
		panic("invalid format")
	}

	// Update checksum.
	// This field is special in that it is terminated by a NULL then space.
	var  formatter
	 := .toV7().chksum()
	,  := .computeChecksum() // Possible values are 256..128776
	.formatOctal([:7], ) // Never fails since 128776 < 262143
	[7] = ' '
}

// computeChecksum computes the checksum for the header block.
// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
// signed byte values.
// We compute and return both.
func ( *block) () (,  int64) {
	for ,  := range  {
		if 148 <=  &&  < 156 {
			 = ' ' // Treat the checksum field itself as all spaces.
		}
		 += int64()
		 += int64(int8())
	}
	return , 
}

// reset clears the block with all zeros.
func ( *block) () {
	* = block{}
}

type headerV7 [blockSize]byte

func ( *headerV7) () []byte     { return [000:][:100] }
func ( *headerV7) () []byte     { return [100:][:8] }
func ( *headerV7) () []byte      { return [108:][:8] }
func ( *headerV7) () []byte      { return [116:][:8] }
func ( *headerV7) () []byte     { return [124:][:12] }
func ( *headerV7) () []byte  { return [136:][:12] }
func ( *headerV7) () []byte   { return [148:][:8] }
func ( *headerV7) () []byte { return [156:][:1] }
func ( *headerV7) () []byte { return [157:][:100] }

type headerGNU [blockSize]byte

func ( *headerGNU) () *headerV7       { return (*headerV7)() }
func ( *headerGNU) () []byte       { return [257:][:6] }
func ( *headerGNU) () []byte     { return [263:][:2] }
func ( *headerGNU) () []byte    { return [265:][:32] }
func ( *headerGNU) () []byte   { return [297:][:32] }
func ( *headerGNU) () []byte    { return [329:][:8] }
func ( *headerGNU) () []byte    { return [337:][:8] }
func ( *headerGNU) () []byte  { return [345:][:12] }
func ( *headerGNU) () []byte  { return [357:][:12] }
func ( *headerGNU) () sparseArray { return sparseArray([386:][:24*4+1]) }
func ( *headerGNU) () []byte    { return [483:][:12] }

type headerSTAR [blockSize]byte

func ( *headerSTAR) () *headerV7      { return (*headerV7)() }
func ( *headerSTAR) () []byte      { return [257:][:6] }
func ( *headerSTAR) () []byte    { return [263:][:2] }
func ( *headerSTAR) () []byte   { return [265:][:32] }
func ( *headerSTAR) () []byte  { return [297:][:32] }
func ( *headerSTAR) () []byte   { return [329:][:8] }
func ( *headerSTAR) () []byte   { return [337:][:8] }
func ( *headerSTAR) () []byte     { return [345:][:131] }
func ( *headerSTAR) () []byte { return [476:][:12] }
func ( *headerSTAR) () []byte { return [488:][:12] }
func ( *headerSTAR) () []byte    { return [508:][:4] }

type headerUSTAR [blockSize]byte

func ( *headerUSTAR) () *headerV7     { return (*headerV7)() }
func ( *headerUSTAR) () []byte     { return [257:][:6] }
func ( *headerUSTAR) () []byte   { return [263:][:2] }
func ( *headerUSTAR) () []byte  { return [265:][:32] }
func ( *headerUSTAR) () []byte { return [297:][:32] }
func ( *headerUSTAR) () []byte  { return [329:][:8] }
func ( *headerUSTAR) () []byte  { return [337:][:8] }
func ( *headerUSTAR) () []byte    { return [345:][:155] }

type sparseArray []byte

func ( sparseArray) ( int) sparseElem { return sparseElem([*24:]) }
func ( sparseArray) () []byte     { return [24*.maxEntries():][:1] }
func ( sparseArray) () int        { return len() / 24 }

type sparseElem []byte

func ( sparseElem) () []byte { return [00:][:12] }
func ( sparseElem) () []byte { return [12:][:12] }