Source File
gunzip.go
Belonging Package
compress/gzip
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gzip implements reading and writing of gzip format compressed files,
// as specified in RFC 1952.
package gzip
import (
)
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
gzipDeflate = 8
flagText = 1 << 0
flagHdrCrc = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
var (
// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
ErrChecksum = errors.New("gzip: invalid checksum")
// ErrHeader is returned when reading GZIP data that has an invalid header.
ErrHeader = errors.New("gzip: invalid header")
)
var le = binary.LittleEndian
// noEOF converts io.EOF to io.ErrUnexpectedEOF.
func noEOF( error) error {
if == io.EOF {
return io.ErrUnexpectedEOF
}
return
}
// The gzip file stores a header giving metadata about the compressed file.
// That header is exposed as the fields of the [Writer] and [Reader] structs.
//
// Strings must be UTF-8 encoded and may only contain Unicode code points
// U+0001 through U+00FF, due to limitations of the GZIP file format.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// A Reader is an [io.Reader] that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
// The Reader will return an [ErrChecksum] when [Reader.Read]
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
// returned by [Reader.Read] as tentative until they receive the [io.EOF]
// marking the end of the data.
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
decompressor io.ReadCloser
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
buf [512]byte
err error
multistream bool
}
// NewReader creates a new [Reader] reading the given reader.
// If r does not also implement [io.ByteReader],
// the decompressor may read more data than necessary from r.
//
// It is the caller's responsibility to call Close on the [Reader] when done.
//
// The [Reader.Header] fields will be valid in the [Reader] returned.
func ( io.Reader) (*Reader, error) {
:= new(Reader)
if := .Reset(); != nil {
return nil,
}
return , nil
}
// Reset discards the [Reader] z's state and makes it equivalent to the
// result of its original state from [NewReader], but reading from r instead.
// This permits reusing a [Reader] rather than allocating a new one.
func ( *Reader) ( io.Reader) error {
* = Reader{
decompressor: .decompressor,
multistream: true,
}
if , := .(flate.Reader); {
.r =
} else {
.r = bufio.NewReader()
}
.Header, .err = .readHeader()
return .err
}
// Multistream controls whether the reader supports multistream files.
//
// If enabled (the default), the [Reader] expects the input to be a sequence
// of individually gzipped data streams, each with its own header and
// trailer, ending at EOF. The effect is that the concatenation of a sequence
// of gzipped files is treated as equivalent to the gzip of the concatenation
// of the sequence. This is standard behavior for gzip readers.
//
// Calling Multistream(false) disables this behavior; disabling the behavior
// can be useful when reading file formats that distinguish individual gzip
// data streams or mix gzip data streams with other data streams.
// In this mode, when the [Reader] reaches the end of the data stream,
// [Reader.Read] returns [io.EOF]. The underlying reader must implement [io.ByteReader]
// in order to be left positioned just after the gzip stream.
// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
// If there is no next stream, z.Reset(r) will return [io.EOF].
func ( *Reader) ( bool) {
.multistream =
}
// readString reads a NUL-terminated string from z.r.
// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
// will output a string encoded using UTF-8.
// This method always updates z.digest with the data read.
func ( *Reader) () (string, error) {
var error
:= false
for := 0; ; ++ {
if >= len(.buf) {
return "", ErrHeader
}
.buf[], = .r.ReadByte()
if != nil {
return "",
}
if .buf[] > 0x7f {
= true
}
if .buf[] == 0 {
// Digest covers the NUL terminator.
.digest = crc32.Update(.digest, crc32.IEEETable, .buf[:+1])
// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
if {
:= make([]rune, 0, )
for , := range .buf[:] {
= append(, rune())
}
return string(), nil
}
return string(.buf[:]), nil
}
}
}
// readHeader reads the GZIP header according to section 2.3.1.
// This method does not set z.err.
func ( *Reader) () ( Header, error) {
if _, = io.ReadFull(.r, .buf[:10]); != nil {
// RFC 1952, section 2.2, says the following:
// A gzip file consists of a series of "members" (compressed data sets).
//
// Other than this, the specification does not clarify whether a
// "series" is defined as "one or more" or "zero or more". To err on the
// side of caution, Go interprets this to mean "zero or more".
// Thus, it is okay to return io.EOF here.
return ,
}
if .buf[0] != gzipID1 || .buf[1] != gzipID2 || .buf[2] != gzipDeflate {
return , ErrHeader
}
:= .buf[3]
if := int64(le.Uint32(.buf[4:8])); > 0 {
// Section 2.3.1, the zero value for MTIME means that the
// modified time is not set.
.ModTime = time.Unix(, 0)
}
// z.buf[8] is XFL and is currently ignored.
.OS = .buf[9]
.digest = crc32.ChecksumIEEE(.buf[:10])
if &flagExtra != 0 {
if _, = io.ReadFull(.r, .buf[:2]); != nil {
return , noEOF()
}
.digest = crc32.Update(.digest, crc32.IEEETable, .buf[:2])
:= make([]byte, le.Uint16(.buf[:2]))
if _, = io.ReadFull(.r, ); != nil {
return , noEOF()
}
.digest = crc32.Update(.digest, crc32.IEEETable, )
.Extra =
}
var string
if &flagName != 0 {
if , = .readString(); != nil {
return , noEOF()
}
.Name =
}
if &flagComment != 0 {
if , = .readString(); != nil {
return , noEOF()
}
.Comment =
}
if &flagHdrCrc != 0 {
if _, = io.ReadFull(.r, .buf[:2]); != nil {
return , noEOF()
}
:= le.Uint16(.buf[:2])
if != uint16(.digest) {
return , ErrHeader
}
}
.digest = 0
if .decompressor == nil {
.decompressor = flate.NewReader(.r)
} else {
.decompressor.(flate.Resetter).Reset(.r, nil)
}
return , nil
}
// Read implements [io.Reader], reading uncompressed bytes from its underlying [Reader].
func ( *Reader) ( []byte) ( int, error) {
if .err != nil {
return 0, .err
}
for == 0 {
, .err = .decompressor.Read()
.digest = crc32.Update(.digest, crc32.IEEETable, [:])
.size += uint32()
if .err != io.EOF {
// In the normal case we return here.
return , .err
}
// Finished file; check checksum and size.
if , := io.ReadFull(.r, .buf[:8]); != nil {
.err = noEOF()
return , .err
}
:= le.Uint32(.buf[:4])
:= le.Uint32(.buf[4:8])
if != .digest || != .size {
.err = ErrChecksum
return , .err
}
.digest, .size = 0, 0
// File is ok; check if there is another.
if !.multistream {
return , io.EOF
}
.err = nil // Remove io.EOF
if _, .err = .readHeader(); .err != nil {
return , .err
}
}
return , nil
}
// Close closes the [Reader]. It does not close the underlying [io.Reader].
// In order for the GZIP checksum to be verified, the reader must be
// fully consumed until the [io.EOF].
func ( *Reader) () error { return .decompressor.Close() }
The pages are generated with Golds v0.7.0-preview. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |