// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package flate implements the DEFLATE compressed data format, described in // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file // formats.
package flate import ( ) const ( maxCodeLen = 16 // max length of Huffman code // The next three numbers come from the RFC section 3.2.7, with the // additional proviso in section 3.2.5 which implies that distance codes // 30 and 31 should never occur in compressed data. maxNumLit = 286 maxNumDist = 30 numCodes = 19 // number of codes in Huffman meta-code ) // Initialize the fixedHuffmanDecoder only once upon first use. var fixedOnce sync.Once var fixedHuffmanDecoder huffmanDecoder // A CorruptInputError reports the presence of corrupt input at a given offset. type CorruptInputError int64 func ( CorruptInputError) () string { return "flate: corrupt input before offset " + strconv.FormatInt(int64(), 10) } // An InternalError reports an error in the flate code itself. type InternalError string func ( InternalError) () string { return "flate: internal error: " + string() } // A ReadError reports an error encountered while reading input. // // Deprecated: No longer returned. type ReadError struct { Offset int64 // byte offset where error occurred Err error // error returned by underlying Read } func ( *ReadError) () string { return "flate: read error at offset " + strconv.FormatInt(.Offset, 10) + ": " + .Err.Error() } // A WriteError reports an error encountered while writing output. // // Deprecated: No longer returned. type WriteError struct { Offset int64 // byte offset where error occurred Err error // error returned by underlying Write } func ( *WriteError) () string { return "flate: write error at offset " + strconv.FormatInt(.Offset, 10) + ": " + .Err.Error() } // Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] // to switch to a new underlying [Reader]. This permits reusing a ReadCloser // instead of allocating a new one. type Resetter interface { // Reset discards any buffered data and resets the Resetter as if it was // newly initialized with the given reader. Reset(r io.Reader, dict []byte) error } // The data structure for decoding Huffman tables is based on that of // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), // For codes smaller than the table width, there are multiple entries // (each combination of trailing bits has the same value). For codes // larger than the table width, the table contains a link to an overflow // table. The width of each entry in the link table is the maximum code // size minus the chunk width. // // Note that you can do a lookup in the table even without all bits // filled. Since the extra bits are zero, and the DEFLATE Huffman codes // have the property that shorter codes come before longer ones, the // bit length estimate in the result is a lower bound on the actual // number of bits. // // See the following: // https://github.com/madler/zlib/raw/master/doc/algorithm.txt // chunk & 15 is number of bits // chunk >> 4 is value, including table link const ( huffmanChunkBits = 9 huffmanNumChunks = 1 << huffmanChunkBits huffmanCountMask = 15 huffmanValueShift = 4 ) type huffmanDecoder struct { min int // the minimum code length chunks [huffmanNumChunks]uint32 // chunks as described above links [][]uint32 // overflow links linkMask uint32 // mask the width of the link table } // Initialize Huffman decoding tables from array of code lengths. // Following this function, h is guaranteed to be initialized into a complete // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a // degenerate case where the tree has only a single symbol with length 1. Empty // trees are permitted. func ( *huffmanDecoder) ( []int) bool { // Sanity enables additional runtime tests during Huffman // table construction. It's intended to be used during // development to supplement the currently ad-hoc unit tests. const = false if .min != 0 { * = huffmanDecoder{} } // Count number of codes of each length, // compute min and max length. var [maxCodeLen]int var , int for , := range { if == 0 { continue } if == 0 || < { = } if > { = } []++ } // Empty tree. The decompressor.huffSym function will fail later if the tree // is used. Technically, an empty tree is only valid for the HDIST tree and // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree // is guaranteed to fail since it will attempt to use the tree to decode the // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is // guaranteed to fail later since the compressed data section must be // composed of at least one symbol (the end-of-block marker). if == 0 { return true } := 0 var [maxCodeLen]int for := ; <= ; ++ { <<= 1 [] = += [] } // Check that the coding is complete (i.e., that we've // assigned all 2-to-the-max possible bit sequences). // Exception: To be compatible with zlib, we also need to // accept degenerate single-code codings. See also // TestDegenerateHuffmanCoding. if != 1<<uint() && !( == 1 && == 1) { return false } .min = if > huffmanChunkBits { := 1 << (uint() - huffmanChunkBits) .linkMask = uint32( - 1) // create link tables := [huffmanChunkBits+1] >> 1 .links = make([][]uint32, huffmanNumChunks-) for := uint(); < huffmanNumChunks; ++ { := int(bits.Reverse16(uint16())) >>= uint(16 - huffmanChunkBits) := - uint() if && .chunks[] != 0 { panic("impossible: overwriting existing chunk") } .chunks[] = uint32(<<huffmanValueShift | (huffmanChunkBits + 1)) .links[] = make([]uint32, ) } } for , := range { if == 0 { continue } := [] []++ := uint32(<<huffmanValueShift | ) := int(bits.Reverse16(uint16())) >>= uint(16 - ) if <= huffmanChunkBits { for := ; < len(.chunks); += 1 << uint() { // We should never need to overwrite // an existing chunk. Also, 0 is // never a valid chunk, because the // lower 4 "count" bits should be // between 1 and 15. if && .chunks[] != 0 { panic("impossible: overwriting existing chunk") } .chunks[] = } } else { := & (huffmanNumChunks - 1) if && .chunks[]&huffmanCountMask != huffmanChunkBits+1 { // Longer codes should have been // associated with a link table above. panic("impossible: not an indirect chunk") } := .chunks[] >> huffmanValueShift := .links[] >>= huffmanChunkBits for := ; < len(); += 1 << uint(-huffmanChunkBits) { if && [] != 0 { panic("impossible: overwriting existing chunk") } [] = } } } if { // Above we've sanity checked that we never overwrote // an existing entry. Here we additionally check that // we filled the tables completely. for , := range .chunks { if == 0 { // As an exception, in the degenerate // single-code case, we allow odd // chunks to be missing. if == 1 && %2 == 1 { continue } panic("impossible: missing chunk") } } for , := range .links { for , := range { if == 0 { panic("impossible: missing chunk") } } } } return true } // The actual read interface needed by [NewReader]. // If the passed in io.Reader does not also have ReadByte, // the [NewReader] will introduce its own buffering. type Reader interface { io.Reader io.ByteReader } // Decompress state. type decompressor struct { // Input source. r Reader rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader roffset int64 // Input bits, in top of b. b uint32 nb uint // Huffman decoders for literal/length, distance. h1, h2 huffmanDecoder // Length arrays used to define Huffman codes. bits *[maxNumLit + maxNumDist]int codebits *[numCodes]int // Output history, buffer. dict dictDecoder // Temporary buffer (avoids repeated allocation). buf [4]byte // Next step in the decompression, // and decompression state. step func(*decompressor) stepState int final bool err error toRead []byte hl, hd *huffmanDecoder copyLen int copyDist int } func ( *decompressor) () { for .nb < 1+2 { if .err = .moreBits(); .err != nil { return } } .final = .b&1 == 1 .b >>= 1 := .b & 3 .b >>= 2 .nb -= 1 + 2 switch { case 0: .dataBlock() case 1: // compressed, fixed Huffman tables .hl = &fixedHuffmanDecoder .hd = nil .huffmanBlock() case 2: // compressed, dynamic Huffman tables if .err = .readHuffman(); .err != nil { break } .hl = &.h1 .hd = &.h2 .huffmanBlock() default: // 3 is reserved. .err = CorruptInputError(.roffset) } } func ( *decompressor) ( []byte) (int, error) { for { if len(.toRead) > 0 { := copy(, .toRead) .toRead = .toRead[:] if len(.toRead) == 0 { return , .err } return , nil } if .err != nil { return 0, .err } .step() if .err != nil && len(.toRead) == 0 { .toRead = .dict.readFlush() // Flush what's left in case of error } } } func ( *decompressor) () error { if .err == io.EOF { return nil } return .err } // RFC 1951 section 3.2.7. // Compression with dynamic Huffman codes var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} func ( *decompressor) () error { // HLIT[5], HDIST[5], HCLEN[4]. for .nb < 5+5+4 { if := .moreBits(); != nil { return } } := int(.b&0x1F) + 257 if > maxNumLit { return CorruptInputError(.roffset) } .b >>= 5 := int(.b&0x1F) + 1 if > maxNumDist { return CorruptInputError(.roffset) } .b >>= 5 := int(.b&0xF) + 4 // numCodes is 19, so nclen is always valid. .b >>= 4 .nb -= 5 + 5 + 4 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. for := 0; < ; ++ { for .nb < 3 { if := .moreBits(); != nil { return } } .codebits[codeOrder[]] = int(.b & 0x7) .b >>= 3 .nb -= 3 } for := ; < len(codeOrder); ++ { .codebits[codeOrder[]] = 0 } if !.h1.init(.codebits[0:]) { return CorruptInputError(.roffset) } // HLIT + 257 code lengths, HDIST + 1 code lengths, // using the code length Huffman code. for , := 0, +; < ; { , := .huffSym(&.h1) if != nil { return } if < 16 { // Actual length. .bits[] = ++ continue } // Repeat previous length or zero. var int var uint var int switch { default: return InternalError("unexpected length code") case 16: = 3 = 2 if == 0 { return CorruptInputError(.roffset) } = .bits[-1] case 17: = 3 = 3 = 0 case 18: = 11 = 7 = 0 } for .nb < { if := .moreBits(); != nil { return } } += int(.b & uint32(1<<-1)) .b >>= .nb -= if + > { return CorruptInputError(.roffset) } for := 0; < ; ++ { .bits[] = ++ } } if !.h1.init(.bits[0:]) || !.h2.init(.bits[:+]) { return CorruptInputError(.roffset) } // As an optimization, we can initialize the min bits to read at a time // for the HLIT tree to the length of the EOB marker since we know that // every block must terminate with one. This preserves the property that // we never read any extra bytes after the end of the DEFLATE stream. if .h1.min < .bits[endBlockMarker] { .h1.min = .bits[endBlockMarker] } return nil } // Decode a single Huffman block from f. // hl and hd are the Huffman states for the lit/length values // and the distance values, respectively. If hd == nil, using the // fixed distance encoding associated with fixed Huffman blocks. func ( *decompressor) () { const ( = iota // Zero value must be stateInit ) switch .stepState { case : goto case : goto } : // Read literal and/or (length, distance) according to RFC section 3.2.3. { , := .huffSym(.hl) if != nil { .err = return } var uint // number of bits extra var int switch { case < 256: .dict.writeByte(byte()) if .dict.availWrite() == 0 { .toRead = .dict.readFlush() .step = (*decompressor). .stepState = return } goto case == 256: .finishBlock() return // otherwise, reference to older data case < 265: = - (257 - 3) = 0 case < 269: = *2 - (265*2 - 11) = 1 case < 273: = *4 - (269*4 - 19) = 2 case < 277: = *8 - (273*8 - 35) = 3 case < 281: = *16 - (277*16 - 67) = 4 case < 285: = *32 - (281*32 - 131) = 5 case < maxNumLit: = 258 = 0 default: .err = CorruptInputError(.roffset) return } if > 0 { for .nb < { if = .moreBits(); != nil { .err = return } } += int(.b & uint32(1<<-1)) .b >>= .nb -= } var int if .hd == nil { for .nb < 5 { if = .moreBits(); != nil { .err = return } } = int(bits.Reverse8(uint8(.b & 0x1F << 3))) .b >>= 5 .nb -= 5 } else { if , = .huffSym(.hd); != nil { .err = return } } switch { case < 4: ++ case < maxNumDist: := uint(-2) >> 1 // have 1 bit in bottom of dist, need nb more. := ( & 1) << for .nb < { if = .moreBits(); != nil { .err = return } } |= int(.b & uint32(1<<-1)) .b >>= .nb -= = 1<<(+1) + 1 + default: .err = CorruptInputError(.roffset) return } // No check on length; encoding can be prescient. if > .dict.histSize() { .err = CorruptInputError(.roffset) return } .copyLen, .copyDist = , goto } : // Perform a backwards copy according to RFC section 3.2.3. { := .dict.tryWriteCopy(.copyDist, .copyLen) if == 0 { = .dict.writeCopy(.copyDist, .copyLen) } .copyLen -= if .dict.availWrite() == 0 || .copyLen > 0 { .toRead = .dict.readFlush() .step = (*decompressor). // We need to continue this work .stepState = return } goto } } // Copy a single uncompressed data block from input to output. func ( *decompressor) () { // Uncompressed. // Discard current half-byte. .nb = 0 .b = 0 // Length then ones-complement of length. , := io.ReadFull(.r, .buf[0:4]) .roffset += int64() if != nil { .err = noEOF() return } := int(.buf[0]) | int(.buf[1])<<8 := int(.buf[2]) | int(.buf[3])<<8 if uint16() != uint16(^) { .err = CorruptInputError(.roffset) return } if == 0 { .toRead = .dict.readFlush() .finishBlock() return } .copyLen = .copyData() } // copyData copies f.copyLen bytes from the underlying reader into f.hist. // It pauses for reads when f.hist is full. func ( *decompressor) () { := .dict.writeSlice() if len() > .copyLen { = [:.copyLen] } , := io.ReadFull(.r, ) .roffset += int64() .copyLen -= .dict.writeMark() if != nil { .err = noEOF() return } if .dict.availWrite() == 0 || .copyLen > 0 { .toRead = .dict.readFlush() .step = (*decompressor). return } .finishBlock() } func ( *decompressor) () { if .final { if .dict.availRead() > 0 { .toRead = .dict.readFlush() } .err = io.EOF } .step = (*decompressor).nextBlock } // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. func noEOF( error) error { if == io.EOF { return io.ErrUnexpectedEOF } return } func ( *decompressor) () error { , := .r.ReadByte() if != nil { return noEOF() } .roffset++ .b |= uint32() << .nb .nb += 8 return nil } // Read the next Huffman-encoded symbol from f according to h. func ( *decompressor) ( *huffmanDecoder) (int, error) { // Since a huffmanDecoder can be empty or be composed of a degenerate tree // with single element, huffSym must error on these two edge cases. In both // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. := uint(.min) // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. , := .nb, .b for { for < { , := .r.ReadByte() if != nil { .b = .nb = return 0, noEOF() } .roffset++ |= uint32() << ( & 31) += 8 } := .chunks[&(huffmanNumChunks-1)] = uint( & huffmanCountMask) if > huffmanChunkBits { = .links[>>huffmanValueShift][(>>huffmanChunkBits)&.linkMask] = uint( & huffmanCountMask) } if <= { if == 0 { .b = .nb = .err = CorruptInputError(.roffset) return 0, .err } .b = >> ( & 31) .nb = - return int( >> huffmanValueShift), nil } } } func ( *decompressor) ( io.Reader) { if , := .(Reader); { .rBuf = nil .r = return } // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. if .rBuf != nil { .rBuf.Reset() } else { // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. .rBuf = bufio.NewReader() } .r = .rBuf } func fixedHuffmanDecoderInit() { fixedOnce.Do(func() { // These come from the RFC section 3.2.6. var [288]int for := 0; < 144; ++ { [] = 8 } for := 144; < 256; ++ { [] = 9 } for := 256; < 280; ++ { [] = 7 } for := 280; < 288; ++ { [] = 8 } fixedHuffmanDecoder.init([:]) }) } func ( *decompressor) ( io.Reader, []byte) error { * = decompressor{ rBuf: .rBuf, bits: .bits, codebits: .codebits, dict: .dict, step: (*decompressor).nextBlock, } .makeReader() .dict.init(maxMatchOffset, ) return nil } // NewReader returns a new ReadCloser that can be used // to read the uncompressed version of r. // If r does not also implement [io.ByteReader], // the decompressor may read more data than necessary from r. // The reader returns [io.EOF] after the final block in the DEFLATE stream has // been encountered. Any trailing data after the final block is ignored. // // The [io.ReadCloser] returned by NewReader also implements [Resetter]. func ( io.Reader) io.ReadCloser { fixedHuffmanDecoderInit() var decompressor .makeReader() .bits = new([maxNumLit + maxNumDist]int) .codebits = new([numCodes]int) .step = (*decompressor).nextBlock .dict.init(maxMatchOffset, nil) return & } // NewReaderDict is like [NewReader] but initializes the reader // with a preset dictionary. The returned [Reader] behaves as if // the uncompressed data stream started with the given dictionary, // which has already been read. NewReaderDict is typically used // to read data compressed by NewWriterDict. // // The ReadCloser returned by NewReaderDict also implements [Resetter]. func ( io.Reader, []byte) io.ReadCloser { fixedHuffmanDecoderInit() var decompressor .makeReader() .bits = new([maxNumLit + maxNumDist]int) .codebits = new([numCodes]int) .step = (*decompressor).nextBlock .dict.init(maxMatchOffset, ) return & }