package zip
import (
"bufio"
"encoding/binary"
"errors"
"hash"
"hash/crc32"
"internal/godebug"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"slices"
"strings"
"sync"
"time"
)
var zipinsecurepath = godebug .New ("zipinsecurepath" )
var (
ErrFormat = errors .New ("zip: not a valid zip file" )
ErrAlgorithm = errors .New ("zip: unsupported compression algorithm" )
ErrChecksum = errors .New ("zip: checksum error" )
ErrInsecurePath = errors .New ("zip: insecure file path" )
)
type Reader struct {
r io .ReaderAt
File []*File
Comment string
decompressors map [uint16 ]Decompressor
baseOffset int64
fileListOnce sync .Once
fileList []fileListEntry
}
type ReadCloser struct {
f *os .File
Reader
}
type File struct {
FileHeader
zip *Reader
zipr io .ReaderAt
headerOffset int64
zip64 bool
}
func OpenReader (name string ) (*ReadCloser , error ) {
f , err := os .Open (name )
if err != nil {
return nil , err
}
fi , err := f .Stat ()
if err != nil {
f .Close ()
return nil , err
}
r := new (ReadCloser )
if err = r .init (f , fi .Size ()); err != nil && err != ErrInsecurePath {
f .Close ()
return nil , err
}
r .f = f
return r , err
}
func NewReader (r io .ReaderAt , size int64 ) (*Reader , error ) {
if size < 0 {
return nil , errors .New ("zip: size cannot be negative" )
}
zr := new (Reader )
var err error
if err = zr .init (r , size ); err != nil && err != ErrInsecurePath {
return nil , err
}
return zr , err
}
func (r *Reader ) init (rdr io .ReaderAt , size int64 ) error {
end , baseOffset , err := readDirectoryEnd (rdr , size )
if err != nil {
return err
}
r .r = rdr
r .baseOffset = baseOffset
if end .directorySize < uint64 (size ) && (uint64 (size )-end .directorySize )/30 >= end .directoryRecords {
r .File = make ([]*File , 0 , end .directoryRecords )
}
r .Comment = end .comment
rs := io .NewSectionReader (rdr , 0 , size )
if _, err = rs .Seek (r .baseOffset +int64 (end .directoryOffset ), io .SeekStart ); err != nil {
return err
}
buf := bufio .NewReader (rs )
for {
f := &File {zip : r , zipr : rdr }
err = readDirectoryHeader (f , buf )
if err == ErrFormat || err == io .ErrUnexpectedEOF {
break
}
if err != nil {
return err
}
f .headerOffset += r .baseOffset
r .File = append (r .File , f )
}
if uint16 (len (r .File )) != uint16 (end .directoryRecords ) {
return err
}
if zipinsecurepath .Value () == "0" {
for _ , f := range r .File {
if f .Name == "" {
continue
}
if !filepath .IsLocal (f .Name ) || strings .Contains (f .Name , `\` ) {
zipinsecurepath .IncNonDefault ()
return ErrInsecurePath
}
}
}
return nil
}
func (r *Reader ) RegisterDecompressor (method uint16 , dcomp Decompressor ) {
if r .decompressors == nil {
r .decompressors = make (map [uint16 ]Decompressor )
}
r .decompressors [method ] = dcomp
}
func (r *Reader ) decompressor (method uint16 ) Decompressor {
dcomp := r .decompressors [method ]
if dcomp == nil {
dcomp = decompressor (method )
}
return dcomp
}
func (rc *ReadCloser ) Close () error {
return rc .f .Close ()
}
func (f *File ) DataOffset () (offset int64 , err error ) {
bodyOffset , err := f .findBodyOffset ()
if err != nil {
return
}
return f .headerOffset + bodyOffset , nil
}
func (f *File ) Open () (io .ReadCloser , error ) {
bodyOffset , err := f .findBodyOffset ()
if err != nil {
return nil , err
}
if strings .HasSuffix (f .Name , "/" ) {
if f .UncompressedSize64 != 0 {
return &dirReader {ErrFormat }, nil
} else {
return &dirReader {io .EOF }, nil
}
}
size := int64 (f .CompressedSize64 )
r := io .NewSectionReader (f .zipr , f .headerOffset +bodyOffset , size )
dcomp := f .zip .decompressor (f .Method )
if dcomp == nil {
return nil , ErrAlgorithm
}
var rc io .ReadCloser = dcomp (r )
var desr io .Reader
if f .hasDataDescriptor () {
desr = io .NewSectionReader (f .zipr , f .headerOffset +bodyOffset +size , dataDescriptorLen )
}
rc = &checksumReader {
rc : rc ,
hash : crc32 .NewIEEE (),
f : f ,
desr : desr ,
}
return rc , nil
}
func (f *File ) OpenRaw () (io .Reader , error ) {
bodyOffset , err := f .findBodyOffset ()
if err != nil {
return nil , err
}
r := io .NewSectionReader (f .zipr , f .headerOffset +bodyOffset , int64 (f .CompressedSize64 ))
return r , nil
}
type dirReader struct {
err error
}
func (r *dirReader ) Read ([]byte ) (int , error ) {
return 0 , r .err
}
func (r *dirReader ) Close () error {
return nil
}
type checksumReader struct {
rc io .ReadCloser
hash hash .Hash32
nread uint64
f *File
desr io .Reader
err error
}
func (r *checksumReader ) Stat () (fs .FileInfo , error ) {
return headerFileInfo {&r .f .FileHeader }, nil
}
func (r *checksumReader ) Read (b []byte ) (n int , err error ) {
if r .err != nil {
return 0 , r .err
}
n , err = r .rc .Read (b )
r .hash .Write (b [:n ])
r .nread += uint64 (n )
if r .nread > r .f .UncompressedSize64 {
return 0 , ErrFormat
}
if err == nil {
return
}
if err == io .EOF {
if r .nread != r .f .UncompressedSize64 {
return 0 , io .ErrUnexpectedEOF
}
if r .desr != nil {
if err1 := readDataDescriptor (r .desr , r .f ); err1 != nil {
if err1 == io .EOF {
err = io .ErrUnexpectedEOF
} else {
err = err1
}
} else if r .hash .Sum32 () != r .f .CRC32 {
err = ErrChecksum
}
} else {
if r .f .CRC32 != 0 && r .hash .Sum32 () != r .f .CRC32 {
err = ErrChecksum
}
}
}
r .err = err
return
}
func (r *checksumReader ) Close () error { return r .rc .Close () }
func (f *File ) findBodyOffset () (int64 , error ) {
var buf [fileHeaderLen ]byte
if _ , err := f .zipr .ReadAt (buf [:], f .headerOffset ); err != nil {
return 0 , err
}
b := readBuf (buf [:])
if sig := b .uint32 (); sig != fileHeaderSignature {
return 0 , ErrFormat
}
b = b [22 :]
filenameLen := int (b .uint16 ())
extraLen := int (b .uint16 ())
return int64 (fileHeaderLen + filenameLen + extraLen ), nil
}
func readDirectoryHeader(f *File , r io .Reader ) error {
var buf [directoryHeaderLen ]byte
if _ , err := io .ReadFull (r , buf [:]); err != nil {
return err
}
b := readBuf (buf [:])
if sig := b .uint32 (); sig != directoryHeaderSignature {
return ErrFormat
}
f .CreatorVersion = b .uint16 ()
f .ReaderVersion = b .uint16 ()
f .Flags = b .uint16 ()
f .Method = b .uint16 ()
f .ModifiedTime = b .uint16 ()
f .ModifiedDate = b .uint16 ()
f .CRC32 = b .uint32 ()
f .CompressedSize = b .uint32 ()
f .UncompressedSize = b .uint32 ()
f .CompressedSize64 = uint64 (f .CompressedSize )
f .UncompressedSize64 = uint64 (f .UncompressedSize )
filenameLen := int (b .uint16 ())
extraLen := int (b .uint16 ())
commentLen := int (b .uint16 ())
b = b [4 :]
f .ExternalAttrs = b .uint32 ()
f .headerOffset = int64 (b .uint32 ())
d := make ([]byte , filenameLen +extraLen +commentLen )
if _ , err := io .ReadFull (r , d ); err != nil {
return err
}
f .Name = string (d [:filenameLen ])
f .Extra = d [filenameLen : filenameLen +extraLen ]
f .Comment = string (d [filenameLen +extraLen :])
utf8Valid1 , utf8Require1 := detectUTF8 (f .Name )
utf8Valid2 , utf8Require2 := detectUTF8 (f .Comment )
switch {
case !utf8Valid1 || !utf8Valid2 :
f .NonUTF8 = true
case !utf8Require1 && !utf8Require2 :
f .NonUTF8 = false
default :
f .NonUTF8 = f .Flags &0x800 == 0
}
needUSize := f .UncompressedSize == ^uint32 (0 )
needCSize := f .CompressedSize == ^uint32 (0 )
needHeaderOffset := f .headerOffset == int64 (^uint32 (0 ))
var modified time .Time
parseExtras :
for extra := readBuf (f .Extra ); len (extra ) >= 4 ; {
fieldTag := extra .uint16 ()
fieldSize := int (extra .uint16 ())
if len (extra ) < fieldSize {
break
}
fieldBuf := extra .sub (fieldSize )
switch fieldTag {
case zip64ExtraID :
f .zip64 = true
if needUSize {
needUSize = false
if len (fieldBuf ) < 8 {
return ErrFormat
}
f .UncompressedSize64 = fieldBuf .uint64 ()
}
if needCSize {
needCSize = false
if len (fieldBuf ) < 8 {
return ErrFormat
}
f .CompressedSize64 = fieldBuf .uint64 ()
}
if needHeaderOffset {
needHeaderOffset = false
if len (fieldBuf ) < 8 {
return ErrFormat
}
f .headerOffset = int64 (fieldBuf .uint64 ())
}
case ntfsExtraID :
if len (fieldBuf ) < 4 {
continue parseExtras
}
fieldBuf .uint32 ()
for len (fieldBuf ) >= 4 {
attrTag := fieldBuf .uint16 ()
attrSize := int (fieldBuf .uint16 ())
if len (fieldBuf ) < attrSize {
continue parseExtras
}
attrBuf := fieldBuf .sub (attrSize )
if attrTag != 1 || attrSize != 24 {
continue
}
const ticksPerSecond = 1e7
ts := int64 (attrBuf .uint64 ())
secs := ts / ticksPerSecond
nsecs := (1e9 / ticksPerSecond ) * (ts % ticksPerSecond )
epoch := time .Date (1601 , time .January , 1 , 0 , 0 , 0 , 0 , time .UTC )
modified = time .Unix (epoch .Unix ()+secs , nsecs )
}
case unixExtraID , infoZipUnixExtraID :
if len (fieldBuf ) < 8 {
continue parseExtras
}
fieldBuf .uint32 ()
ts := int64 (fieldBuf .uint32 ())
modified = time .Unix (ts , 0 )
case extTimeExtraID :
if len (fieldBuf ) < 5 || fieldBuf .uint8 ()&1 == 0 {
continue parseExtras
}
ts := int64 (fieldBuf .uint32 ())
modified = time .Unix (ts , 0 )
}
}
msdosModified := msDosTimeToTime (f .ModifiedDate , f .ModifiedTime )
f .Modified = msdosModified
if !modified .IsZero () {
f .Modified = modified .UTC ()
if f .ModifiedTime != 0 || f .ModifiedDate != 0 {
f .Modified = modified .In (timeZone (msdosModified .Sub (modified )))
}
}
_ = needUSize
if needCSize || needHeaderOffset {
return ErrFormat
}
return nil
}
func readDataDescriptor(r io .Reader , f *File ) error {
var buf [dataDescriptorLen ]byte
if _ , err := io .ReadFull (r , buf [:4 ]); err != nil {
return err
}
off := 0
maybeSig := readBuf (buf [:4 ])
if maybeSig .uint32 () != dataDescriptorSignature {
off += 4
}
if _ , err := io .ReadFull (r , buf [off :12 ]); err != nil {
return err
}
b := readBuf (buf [:12 ])
if b .uint32 () != f .CRC32 {
return ErrChecksum
}
return nil
}
func readDirectoryEnd(r io .ReaderAt , size int64 ) (dir *directoryEnd , baseOffset int64 , err error ) {
var buf []byte
var directoryEndOffset int64
for i , bLen := range []int64 {1024 , 65 * 1024 } {
if bLen > size {
bLen = size
}
buf = make ([]byte , int (bLen ))
if _ , err := r .ReadAt (buf , size -bLen ); err != nil && err != io .EOF {
return nil , 0 , err
}
if p := findSignatureInBlock (buf ); p >= 0 {
buf = buf [p :]
directoryEndOffset = size - bLen + int64 (p )
break
}
if i == 1 || bLen == size {
return nil , 0 , ErrFormat
}
}
b := readBuf (buf [4 :])
d := &directoryEnd {
diskNbr : uint32 (b .uint16 ()),
dirDiskNbr : uint32 (b .uint16 ()),
dirRecordsThisDisk : uint64 (b .uint16 ()),
directoryRecords : uint64 (b .uint16 ()),
directorySize : uint64 (b .uint32 ()),
directoryOffset : uint64 (b .uint32 ()),
commentLen : b .uint16 (),
}
l := int (d .commentLen )
if l > len (b ) {
return nil , 0 , errors .New ("zip: invalid comment length" )
}
d .comment = string (b [:l ])
if d .directoryRecords == 0xffff || d .directorySize == 0xffff || d .directoryOffset == 0xffffffff {
p , err := findDirectory64End (r , directoryEndOffset )
if err == nil && p >= 0 {
directoryEndOffset = p
err = readDirectory64End (r , p , d )
}
if err != nil {
return nil , 0 , err
}
}
maxInt64 := uint64 (1 <<63 - 1 )
if d .directorySize > maxInt64 || d .directoryOffset > maxInt64 {
return nil , 0 , ErrFormat
}
baseOffset = directoryEndOffset - int64 (d .directorySize ) - int64 (d .directoryOffset )
if o := baseOffset + int64 (d .directoryOffset ); o < 0 || o >= size {
return nil , 0 , ErrFormat
}
if baseOffset > 0 {
off := int64 (d .directoryOffset )
rs := io .NewSectionReader (r , off , size -off )
if readDirectoryHeader (&File {}, rs ) == nil {
baseOffset = 0
}
}
return d , baseOffset , nil
}
func findDirectory64End(r io .ReaderAt , directoryEndOffset int64 ) (int64 , error ) {
locOffset := directoryEndOffset - directory64LocLen
if locOffset < 0 {
return -1 , nil
}
buf := make ([]byte , directory64LocLen )
if _ , err := r .ReadAt (buf , locOffset ); err != nil {
return -1 , err
}
b := readBuf (buf )
if sig := b .uint32 (); sig != directory64LocSignature {
return -1 , nil
}
if b .uint32 () != 0 {
return -1 , nil
}
p := b .uint64 ()
if b .uint32 () != 1 {
return -1 , nil
}
return int64 (p ), nil
}
func readDirectory64End(r io .ReaderAt , offset int64 , d *directoryEnd ) (err error ) {
buf := make ([]byte , directory64EndLen )
if _ , err := r .ReadAt (buf , offset ); err != nil {
return err
}
b := readBuf (buf )
if sig := b .uint32 (); sig != directory64EndSignature {
return ErrFormat
}
b = b [12 :]
d .diskNbr = b .uint32 ()
d .dirDiskNbr = b .uint32 ()
d .dirRecordsThisDisk = b .uint64 ()
d .directoryRecords = b .uint64 ()
d .directorySize = b .uint64 ()
d .directoryOffset = b .uint64 ()
return nil
}
func findSignatureInBlock(b []byte ) int {
for i := len (b ) - directoryEndLen ; i >= 0 ; i -- {
if b [i ] == 'P' && b [i +1 ] == 'K' && b [i +2 ] == 0x05 && b [i +3 ] == 0x06 {
n := int (b [i +directoryEndLen -2 ]) | int (b [i +directoryEndLen -1 ])<<8
if n +directoryEndLen +i > len (b ) {
return -1
}
return i
}
}
return -1
}
type readBuf []byte
func (b *readBuf ) uint8 () uint8 {
v := (*b )[0 ]
*b = (*b )[1 :]
return v
}
func (b *readBuf ) uint16 () uint16 {
v := binary .LittleEndian .Uint16 (*b )
*b = (*b )[2 :]
return v
}
func (b *readBuf ) uint32 () uint32 {
v := binary .LittleEndian .Uint32 (*b )
*b = (*b )[4 :]
return v
}
func (b *readBuf ) uint64 () uint64 {
v := binary .LittleEndian .Uint64 (*b )
*b = (*b )[8 :]
return v
}
func (b *readBuf ) sub (n int ) readBuf {
b2 := (*b )[:n ]
*b = (*b )[n :]
return b2
}
type fileListEntry struct {
name string
file *File
isDir bool
isDup bool
}
type fileInfoDirEntry interface {
fs .FileInfo
fs .DirEntry
}
func (f *fileListEntry ) stat () (fileInfoDirEntry , error ) {
if f .isDup {
return nil , errors .New (f .name + ": duplicate entries in zip file" )
}
if !f .isDir {
return headerFileInfo {&f .file .FileHeader }, nil
}
return f , nil
}
func (f *fileListEntry ) Name () string { _ , elem , _ := split (f .name ); return elem }
func (f *fileListEntry ) Size () int64 { return 0 }
func (f *fileListEntry ) Mode () fs .FileMode { return fs .ModeDir | 0555 }
func (f *fileListEntry ) Type () fs .FileMode { return fs .ModeDir }
func (f *fileListEntry ) IsDir () bool { return true }
func (f *fileListEntry ) Sys () any { return nil }
func (f *fileListEntry ) ModTime () time .Time {
if f .file == nil {
return time .Time {}
}
return f .file .FileHeader .Modified .UTC ()
}
func (f *fileListEntry ) Info () (fs .FileInfo , error ) { return f , nil }
func (f *fileListEntry ) String () string {
return fs .FormatDirEntry (f )
}
func toValidName(name string ) string {
name = strings .ReplaceAll (name , `\` , `/` )
p := path .Clean (name )
p = strings .TrimPrefix (p , "/" )
for strings .HasPrefix (p , "../" ) {
p = p [len ("../" ):]
}
return p
}
func (r *Reader ) initFileList () {
r .fileListOnce .Do (func () {
files := make (map [string ]int )
knownDirs := make (map [string ]int )
dirs := make (map [string ]bool )
for _ , file := range r .File {
isDir := len (file .Name ) > 0 && file .Name [len (file .Name )-1 ] == '/'
name := toValidName (file .Name )
if name == "" {
continue
}
if idx , ok := files [name ]; ok {
r .fileList [idx ].isDup = true
continue
}
if idx , ok := knownDirs [name ]; ok {
r .fileList [idx ].isDup = true
continue
}
for dir := path .Dir (name ); dir != "." ; dir = path .Dir (dir ) {
dirs [dir ] = true
}
idx := len (r .fileList )
entry := fileListEntry {
name : name ,
file : file ,
isDir : isDir ,
}
r .fileList = append (r .fileList , entry )
if isDir {
knownDirs [name ] = idx
} else {
files [name ] = idx
}
}
for dir := range dirs {
if _ , ok := knownDirs [dir ]; !ok {
if idx , ok := files [dir ]; ok {
r .fileList [idx ].isDup = true
} else {
entry := fileListEntry {
name : dir ,
file : nil ,
isDir : true ,
}
r .fileList = append (r .fileList , entry )
}
}
}
slices .SortFunc (r .fileList , func (a , b fileListEntry ) int {
return fileEntryCompare (a .name , b .name )
})
})
}
func fileEntryCompare(x , y string ) int {
xdir , xelem , _ := split (x )
ydir , yelem , _ := split (y )
if xdir != ydir {
return strings .Compare (xdir , ydir )
}
return strings .Compare (xelem , yelem )
}
func (r *Reader ) Open (name string ) (fs .File , error ) {
r .initFileList ()
if !fs .ValidPath (name ) {
return nil , &fs .PathError {Op : "open" , Path : name , Err : fs .ErrInvalid }
}
e := r .openLookup (name )
if e == nil {
return nil , &fs .PathError {Op : "open" , Path : name , Err : fs .ErrNotExist }
}
if e .isDir {
return &openDir {e , r .openReadDir (name ), 0 }, nil
}
rc , err := e .file .Open ()
if err != nil {
return nil , err
}
return rc .(fs .File ), nil
}
func split(name string ) (dir , elem string , isDir bool ) {
if len (name ) > 0 && name [len (name )-1 ] == '/' {
isDir = true
name = name [:len (name )-1 ]
}
i := len (name ) - 1
for i >= 0 && name [i ] != '/' {
i --
}
if i < 0 {
return "." , name , isDir
}
return name [:i ], name [i +1 :], isDir
}
var dotFile = &fileListEntry {name : "./" , isDir : true }
func (r *Reader ) openLookup (name string ) *fileListEntry {
if name == "." {
return dotFile
}
dir , elem , _ := split (name )
files := r .fileList
i , _ := slices .BinarySearchFunc (files , dir , func (a fileListEntry , dir string ) (ret int ) {
idir , ielem , _ := split (a .name )
if dir != idir {
return strings .Compare (idir , dir )
}
return strings .Compare (ielem , elem )
})
if i < len (files ) {
fname := files [i ].name
if fname == name || len (fname ) == len (name )+1 && fname [len (name )] == '/' && fname [:len (name )] == name {
return &files [i ]
}
}
return nil
}
func (r *Reader ) openReadDir (dir string ) []fileListEntry {
files := r .fileList
i , _ := slices .BinarySearchFunc (files , dir , func (a fileListEntry , dir string ) int {
idir , _ , _ := split (a .name )
if dir != idir {
return strings .Compare (idir , dir )
}
return +1
})
j , _ := slices .BinarySearchFunc (files , dir , func (a fileListEntry , dir string ) int {
jdir , _ , _ := split (a .name )
if dir != jdir {
return strings .Compare (jdir , dir )
}
return -1
})
return files [i :j ]
}
type openDir struct {
e *fileListEntry
files []fileListEntry
offset int
}
func (d *openDir ) Close () error { return nil }
func (d *openDir ) Stat () (fs .FileInfo , error ) { return d .e .stat () }
func (d *openDir ) Read ([]byte ) (int , error ) {
return 0 , &fs .PathError {Op : "read" , Path : d .e .name , Err : errors .New ("is a directory" )}
}
func (d *openDir ) ReadDir (count int ) ([]fs .DirEntry , error ) {
n := len (d .files ) - d .offset
if count > 0 && n > count {
n = count
}
if n == 0 {
if count <= 0 {
return nil , nil
}
return nil , io .EOF
}
list := make ([]fs .DirEntry , n )
for i := range list {
s , err := d .files [d .offset +i ].stat ()
if err != nil {
return nil , err
}
list [i ] = s
}
d .offset += n
return list , nil
}
The pages are generated with Golds v0.7.0-preview . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .