Source File
huffman.go
Belonging Package
compress/bzip2
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bzip2
import (
)
// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
// symbol.
type huffmanTree struct {
// nodes contains all the non-leaf nodes in the tree. nodes[0] is the
// root of the tree and nextNode contains the index of the next element
// of nodes to use when the tree is being constructed.
nodes []huffmanNode
nextNode int
}
// A huffmanNode is a node in the tree. left and right contain indexes into the
// nodes slice of the tree. If left or right is invalidNodeValue then the child
// is a left node and its value is in leftValue/rightValue.
//
// The symbols are uint16s because bzip2 encodes not only MTF indexes in the
// tree, but also two magic values for run-length encoding and an EOF symbol.
// Thus there are more than 256 possible symbols.
type huffmanNode struct {
left, right uint16
leftValue, rightValue uint16
}
// invalidNodeValue is an invalid index which marks a leaf node in the tree.
const invalidNodeValue = 0xffff
// Decode reads bits from the given bitReader and navigates the tree until a
// symbol is found.
func ( *huffmanTree) ( *bitReader) ( uint16) {
:= uint16(0) // node 0 is the root of the tree.
for {
:= &.nodes[]
var uint16
if .bits > 0 {
// Get next bit - fast path.
.bits--
= uint16(.n>>(.bits&63)) & 1
} else {
// Get next bit - slow path.
// Use ReadBits to retrieve a single bit
// from the underling io.ByteReader.
= uint16(.ReadBits(1))
}
// Trick a compiler into generating conditional move instead of branch,
// by making both loads unconditional.
, := .left, .right
if == 1 {
=
} else {
=
}
if == invalidNodeValue {
// We found a leaf. Use the value of bit to decide
// whether is a left or a right value.
, := .leftValue, .rightValue
if == 1 {
=
} else {
=
}
return
}
}
}
// newHuffmanTree builds a Huffman tree from a slice containing the code
// lengths of each symbol. The maximum code length is 32 bits.
func newHuffmanTree( []uint8) (huffmanTree, error) {
// There are many possible trees that assign the same code length to
// each symbol (consider reflecting a tree down the middle, for
// example). Since the code length assignments determine the
// efficiency of the tree, each of these trees is equally good. In
// order to minimize the amount of information needed to build a tree
// bzip2 uses a canonical tree so that it can be reconstructed given
// only the code length assignments.
if len() < 2 {
panic("newHuffmanTree: too few symbols")
}
var huffmanTree
// First we sort the code length assignments by ascending code length,
// using the symbol value to break ties.
:= make([]huffmanSymbolLengthPair, len())
for , := range {
[].value = uint16()
[].length =
}
slices.SortFunc(, func(, huffmanSymbolLengthPair) int {
if := cmp.Compare(.length, .length); != 0 {
return
}
return cmp.Compare(.value, .value)
})
// Now we assign codes to the symbols, starting with the longest code.
// We keep the codes packed into a uint32, at the most-significant end.
// So branches are taken from the MSB downwards. This makes it easy to
// sort them later.
:= uint32(0)
:= uint8(32)
:= make([]huffmanCode, len())
for := len() - 1; >= 0; -- {
if > [].length {
= [].length
}
[].code =
[].codeLen =
[].value = [].value
// We need to 'increment' the code, which means treating |code|
// like a |length| bit number.
+= 1 << (32 - )
}
// Now we can sort by the code so that the left half of each branch are
// grouped together, recursively.
slices.SortFunc(, func(, huffmanCode) int {
return cmp.Compare(.code, .code)
})
.nodes = make([]huffmanNode, len())
, := buildHuffmanNode(&, , 0)
return ,
}
// huffmanSymbolLengthPair contains a symbol and its code length.
type huffmanSymbolLengthPair struct {
value uint16
length uint8
}
// huffmanCode contains a symbol, its code and code length.
type huffmanCode struct {
code uint32
codeLen uint8
value uint16
}
// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
// the Huffman tree at the given level. It returns the index of the newly
// constructed node.
func buildHuffmanNode( *huffmanTree, []huffmanCode, uint32) ( uint16, error) {
:= uint32(1) << (31 - )
// We have to search the list of codes to find the divide between the left and right sides.
:= len()
for , := range {
if .code& != 0 {
=
break
}
}
:= [:]
:= [:]
if len() == 0 || len() == 0 {
// There is a superfluous level in the Huffman tree indicating
// a bug in the encoder. However, this bug has been observed in
// the wild so we handle it.
// If this function was called recursively then we know that
// len(codes) >= 2 because, otherwise, we would have hit the
// "leaf node" case, below, and not recurred.
//
// However, for the initial call it's possible that len(codes)
// is zero or one. Both cases are invalid because a zero length
// tree cannot encode anything and a length-1 tree can only
// encode EOF and so is superfluous. We reject both.
if len() < 2 {
return 0, StructuralError("empty Huffman tree")
}
// In this case the recursion doesn't always reduce the length
// of codes so we need to ensure termination via another
// mechanism.
if == 31 {
// Since len(codes) >= 2 the only way that the values
// can match at all 32 bits is if they are equal, which
// is invalid. This ensures that we never enter
// infinite recursion.
return 0, StructuralError("equal symbols in Huffman tree")
}
if len() == 0 {
return (, , +1)
}
return (, , +1)
}
= uint16(.nextNode)
:= &.nodes[.nextNode]
.nextNode++
if len() == 1 {
// leaf node
.left = invalidNodeValue
.leftValue = [0].value
} else {
.left, = (, , +1)
}
if != nil {
return
}
if len() == 1 {
// leaf node
.right = invalidNodeValue
.rightValue = [0].value
} else {
.right, = (, , +1)
}
return
}
The pages are generated with Golds v0.7.0-preview. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |