package zstd
import (
"encoding/binary"
"errors"
"fmt"
"io"
)
var fuzzing = false
type Reader struct {
r io .Reader
sawFrameHeader bool
hasChecksum bool
readOneFrame bool
frameSizeUnknown bool
remainingFrameSize uint64
blockOffset int64
buffer []byte
off int
repeatedOffset1 uint32
repeatedOffset2 uint32
repeatedOffset3 uint32
huffmanTable []uint16
huffmanTableBits int
window window
compressedBuf []byte
literals []byte
seqTables [3 ][]fseBaselineEntry
seqTableBits [3 ]uint8
seqTableBuffers [3 ][]fseBaselineEntry
scratch [16 ]byte
fseScratch []fseEntry
checksum xxhash64
}
func NewReader (input io .Reader ) *Reader {
r := new (Reader )
r .Reset (input )
return r
}
func (r *Reader ) Reset (input io .Reader ) {
r .r = input
r .sawFrameHeader = false
r .hasChecksum = false
r .readOneFrame = false
r .frameSizeUnknown = false
r .remainingFrameSize = 0
r .blockOffset = 0
r .buffer = r .buffer [:0 ]
r .off = 0
}
func (r *Reader ) Read (p []byte ) (int , error ) {
if err := r .refillIfNeeded (); err != nil {
return 0 , err
}
n := copy (p , r .buffer [r .off :])
r .off += n
return n , nil
}
func (r *Reader ) ReadByte () (byte , error ) {
if err := r .refillIfNeeded (); err != nil {
return 0 , err
}
ret := r .buffer [r .off ]
r .off ++
return ret , nil
}
func (r *Reader ) refillIfNeeded () error {
for r .off >= len (r .buffer ) {
if err := r .refill (); err != nil {
return err
}
r .off = 0
}
return nil
}
func (r *Reader ) refill () error {
if !r .sawFrameHeader {
if err := r .readFrameHeader (); err != nil {
return err
}
}
return r .readBlock ()
}
func (r *Reader ) readFrameHeader () error {
retry :
relativeOffset := 0
if _ , err := io .ReadFull (r .r , r .scratch [:4 ]); err != nil {
if err == io .EOF && !r .readOneFrame {
err = io .ErrUnexpectedEOF
}
return r .wrapError (relativeOffset , err )
}
if magic := binary .LittleEndian .Uint32 (r .scratch [:4 ]); magic != 0xfd2fb528 {
if magic >= 0x184d2a50 && magic <= 0x184d2a5f {
r .blockOffset += int64 (relativeOffset ) + 4
if err := r .skipFrame (); err != nil {
return err
}
r .readOneFrame = true
goto retry
}
return r .makeError (relativeOffset , "invalid magic number" )
}
relativeOffset += 4
if _ , err := io .ReadFull (r .r , r .scratch [:1 ]); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
descriptor := r .scratch [0 ]
singleSegment := descriptor &(1 <<5 ) != 0
fcsFieldSize := 1 << (descriptor >> 6 )
if fcsFieldSize == 1 && !singleSegment {
fcsFieldSize = 0
}
var windowDescriptorSize int
if singleSegment {
windowDescriptorSize = 0
} else {
windowDescriptorSize = 1
}
if descriptor &(1 <<3 ) != 0 {
return r .makeError (relativeOffset , "reserved bit set in frame header descriptor" )
}
r .hasChecksum = descriptor &(1 <<2 ) != 0
if r .hasChecksum {
r .checksum .reset ()
}
dictionaryIdSize := 0
if dictIdFlag := descriptor & 3 ; dictIdFlag != 0 {
dictionaryIdSize = 1 << (dictIdFlag - 1 )
}
relativeOffset ++
headerSize := windowDescriptorSize + dictionaryIdSize + fcsFieldSize
if _ , err := io .ReadFull (r .r , r .scratch [:headerSize ]); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
var windowSize uint64
if !singleSegment {
windowDescriptor := r .scratch [0 ]
exponent := uint64 (windowDescriptor >> 3 )
mantissa := uint64 (windowDescriptor & 7 )
windowLog := exponent + 10
windowBase := uint64 (1 ) << windowLog
windowAdd := (windowBase / 8 ) * mantissa
windowSize = windowBase + windowAdd
if fuzzing && (windowLog > 31 || windowSize > 1 <<27 ) {
return r .makeError (relativeOffset , "windowSize too large" )
}
}
if dictionaryIdSize != 0 {
dictionaryId := r .scratch [windowDescriptorSize : windowDescriptorSize +dictionaryIdSize ]
for _ , b := range dictionaryId {
if b != 0 {
return r .makeError (relativeOffset , "dictionaries are not supported" )
}
}
}
r .frameSizeUnknown = false
r .remainingFrameSize = 0
fb := r .scratch [windowDescriptorSize +dictionaryIdSize :]
switch fcsFieldSize {
case 0 :
r .frameSizeUnknown = true
case 1 :
r .remainingFrameSize = uint64 (fb [0 ])
case 2 :
r .remainingFrameSize = 256 + uint64 (binary .LittleEndian .Uint16 (fb ))
case 4 :
r .remainingFrameSize = uint64 (binary .LittleEndian .Uint32 (fb ))
case 8 :
r .remainingFrameSize = binary .LittleEndian .Uint64 (fb )
default :
panic ("unreachable" )
}
if singleSegment {
windowSize = r .remainingFrameSize
}
const maxWindowSize = 8 << 20
if windowSize > maxWindowSize {
windowSize = maxWindowSize
}
relativeOffset += headerSize
r .sawFrameHeader = true
r .readOneFrame = true
r .blockOffset += int64 (relativeOffset )
r .repeatedOffset1 = 1
r .repeatedOffset2 = 4
r .repeatedOffset3 = 8
r .huffmanTableBits = 0
r .window .reset (int (windowSize ))
r .seqTables [0 ] = nil
r .seqTables [1 ] = nil
r .seqTables [2 ] = nil
return nil
}
func (r *Reader ) skipFrame () error {
relativeOffset := 0
if _ , err := io .ReadFull (r .r , r .scratch [:4 ]); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
relativeOffset += 4
size := binary .LittleEndian .Uint32 (r .scratch [:4 ])
if size == 0 {
r .blockOffset += int64 (relativeOffset )
return nil
}
if seeker , ok := r .r .(io .Seeker ); ok {
r .blockOffset += int64 (relativeOffset )
prev , err := seeker .Seek (0 , io .SeekCurrent )
if err != nil {
return r .wrapError (0 , err )
}
end , err := seeker .Seek (0 , io .SeekEnd )
if err != nil {
return r .wrapError (0 , err )
}
if prev > end -int64 (size ) {
r .blockOffset += end - prev
return r .makeEOFError (0 )
}
_, err = seeker .Seek (prev +int64 (size ), io .SeekStart )
if err != nil {
return r .wrapError (0 , err )
}
r .blockOffset += int64 (size )
return nil
}
var skip []byte
const chunk = 1 << 20
for size >= chunk {
if len (skip ) == 0 {
skip = make ([]byte , chunk )
}
if _ , err := io .ReadFull (r .r , skip ); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
relativeOffset += chunk
size -= chunk
}
if size > 0 {
if len (skip ) == 0 {
skip = make ([]byte , size )
}
if _ , err := io .ReadFull (r .r , skip ); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
relativeOffset += int (size )
}
r .blockOffset += int64 (relativeOffset )
return nil
}
func (r *Reader ) readBlock () error {
relativeOffset := 0
if _ , err := io .ReadFull (r .r , r .scratch [:3 ]); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
relativeOffset += 3
header := uint32 (r .scratch [0 ]) | (uint32 (r .scratch [1 ]) << 8 ) | (uint32 (r .scratch [2 ]) << 16 )
lastBlock := header &1 != 0
blockType := (header >> 1 ) & 3
blockSize := int (header >> 3 )
if blockSize > 128 <<10 || (r .window .size > 0 && blockSize > r .window .size ) {
return r .makeError (relativeOffset , "block size too large" )
}
switch blockType {
case 0 :
r .setBufferSize (blockSize )
if _ , err := io .ReadFull (r .r , r .buffer ); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
relativeOffset += blockSize
r .blockOffset += int64 (relativeOffset )
case 1 :
r .setBufferSize (blockSize )
if _ , err := io .ReadFull (r .r , r .scratch [:1 ]); err != nil {
return r .wrapNonEOFError (relativeOffset , err )
}
relativeOffset ++
v := r .scratch [0 ]
for i := range r .buffer {
r .buffer [i ] = v
}
r .blockOffset += int64 (relativeOffset )
case 2 :
r .blockOffset += int64 (relativeOffset )
if err := r .compressedBlock (blockSize ); err != nil {
return err
}
r .blockOffset += int64 (blockSize )
case 3 :
return r .makeError (relativeOffset , "invalid block type" )
}
if !r .frameSizeUnknown {
if uint64 (len (r .buffer )) > r .remainingFrameSize {
return r .makeError (relativeOffset , "too many uncompressed bytes in frame" )
}
r .remainingFrameSize -= uint64 (len (r .buffer ))
}
if r .hasChecksum {
r .checksum .update (r .buffer )
}
if !lastBlock {
r .window .save (r .buffer )
} else {
if !r .frameSizeUnknown && r .remainingFrameSize != 0 {
return r .makeError (relativeOffset , "not enough uncompressed bytes for frame" )
}
if r .hasChecksum {
if _ , err := io .ReadFull (r .r , r .scratch [:4 ]); err != nil {
return r .wrapNonEOFError (0 , err )
}
inputChecksum := binary .LittleEndian .Uint32 (r .scratch [:4 ])
dataChecksum := uint32 (r .checksum .digest ())
if inputChecksum != dataChecksum {
return r .wrapError (0 , fmt .Errorf ("invalid checksum: got %#x want %#x" , dataChecksum , inputChecksum ))
}
r .blockOffset += 4
}
r .sawFrameHeader = false
}
return nil
}
func (r *Reader ) setBufferSize (size int ) {
if cap (r .buffer ) < size {
need := size - cap (r .buffer )
r .buffer = append (r .buffer [:cap (r .buffer )], make ([]byte , need )...)
}
r .buffer = r .buffer [:size ]
}
type zstdError struct {
offset int64
err error
}
func (ze *zstdError ) Error () string {
return fmt .Sprintf ("zstd decompression error at %d: %v" , ze .offset , ze .err )
}
func (ze *zstdError ) Unwrap () error {
return ze .err
}
func (r *Reader ) makeEOFError (off int ) error {
return r .wrapError (off , io .ErrUnexpectedEOF )
}
func (r *Reader ) wrapNonEOFError (off int , err error ) error {
if err == io .EOF {
err = io .ErrUnexpectedEOF
}
return r .wrapError (off , err )
}
func (r *Reader ) makeError (off int , msg string ) error {
return r .wrapError (off , errors .New (msg ))
}
func (r *Reader ) wrapError (off int , err error ) error {
if err == io .EOF {
return err
}
return &zstdError {r .blockOffset + int64 (off ), err }
}
The pages are generated with Golds v0.7.0-preview . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .