Source File
huffman.go
Belonging Package
compress/bzip2
// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package bzip2import ()// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a// symbol.type huffmanTree struct {// nodes contains all the non-leaf nodes in the tree. nodes[0] is the// root of the tree and nextNode contains the index of the next element// of nodes to use when the tree is being constructed.nodes []huffmanNodenextNode int}// A huffmanNode is a node in the tree. left and right contain indexes into the// nodes slice of the tree. If left or right is invalidNodeValue then the child// is a left node and its value is in leftValue/rightValue.//// The symbols are uint16s because bzip2 encodes not only MTF indexes in the// tree, but also two magic values for run-length encoding and an EOF symbol.// Thus there are more than 256 possible symbols.type huffmanNode struct {left, right uint16leftValue, rightValue uint16}// invalidNodeValue is an invalid index which marks a leaf node in the tree.const invalidNodeValue = 0xffff// Decode reads bits from the given bitReader and navigates the tree until a// symbol is found.func ( *huffmanTree) ( *bitReader) ( uint16) {:= uint16(0) // node 0 is the root of the tree.for {:= &.nodes[]var uint16if .bits > 0 {// Get next bit - fast path..bits--= uint16(.n>>(.bits&63)) & 1} else {// Get next bit - slow path.// Use ReadBits to retrieve a single bit// from the underling io.ByteReader.= uint16(.ReadBits(1))}// Trick a compiler into generating conditional move instead of branch,// by making both loads unconditional., := .left, .rightif == 1 {=} else {=}if == invalidNodeValue {// We found a leaf. Use the value of bit to decide// whether is a left or a right value., := .leftValue, .rightValueif == 1 {=} else {=}return}}}// newHuffmanTree builds a Huffman tree from a slice containing the code// lengths of each symbol. The maximum code length is 32 bits.func newHuffmanTree( []uint8) (huffmanTree, error) {// There are many possible trees that assign the same code length to// each symbol (consider reflecting a tree down the middle, for// example). Since the code length assignments determine the// efficiency of the tree, each of these trees is equally good. In// order to minimize the amount of information needed to build a tree// bzip2 uses a canonical tree so that it can be reconstructed given// only the code length assignments.if len() < 2 {panic("newHuffmanTree: too few symbols")}var huffmanTree// First we sort the code length assignments by ascending code length,// using the symbol value to break ties.:= make([]huffmanSymbolLengthPair, len())for , := range {[].value = uint16()[].length =}slices.SortFunc(, func(, huffmanSymbolLengthPair) int {if := cmp.Compare(.length, .length); != 0 {return}return cmp.Compare(.value, .value)})// Now we assign codes to the symbols, starting with the longest code.// We keep the codes packed into a uint32, at the most-significant end.// So branches are taken from the MSB downwards. This makes it easy to// sort them later.:= uint32(0):= uint8(32):= make([]huffmanCode, len())for := len() - 1; >= 0; -- {if > [].length {= [].length}[].code =[].codeLen =[].value = [].value// We need to 'increment' the code, which means treating |code|// like a |length| bit number.+= 1 << (32 - )}// Now we can sort by the code so that the left half of each branch are// grouped together, recursively.slices.SortFunc(, func(, huffmanCode) int {return cmp.Compare(.code, .code)}).nodes = make([]huffmanNode, len()), := buildHuffmanNode(&, , 0)return ,}// huffmanSymbolLengthPair contains a symbol and its code length.type huffmanSymbolLengthPair struct {value uint16length uint8}// huffmanCode contains a symbol, its code and code length.type huffmanCode struct {code uint32codeLen uint8value uint16}// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in// the Huffman tree at the given level. It returns the index of the newly// constructed node.func buildHuffmanNode( *huffmanTree, []huffmanCode, uint32) ( uint16, error) {:= uint32(1) << (31 - )// We have to search the list of codes to find the divide between the left and right sides.:= len()for , := range {if .code& != 0 {=break}}:= [:]:= [:]if len() == 0 || len() == 0 {// There is a superfluous level in the Huffman tree indicating// a bug in the encoder. However, this bug has been observed in// the wild so we handle it.// If this function was called recursively then we know that// len(codes) >= 2 because, otherwise, we would have hit the// "leaf node" case, below, and not recurred.//// However, for the initial call it's possible that len(codes)// is zero or one. Both cases are invalid because a zero length// tree cannot encode anything and a length-1 tree can only// encode EOF and so is superfluous. We reject both.if len() < 2 {return 0, StructuralError("empty Huffman tree")}// In this case the recursion doesn't always reduce the length// of codes so we need to ensure termination via another// mechanism.if == 31 {// Since len(codes) >= 2 the only way that the values// can match at all 32 bits is if they are equal, which// is invalid. This ensures that we never enter// infinite recursion.return 0, StructuralError("equal symbols in Huffman tree")}if len() == 0 {return (, , +1)}return (, , +1)}= uint16(.nextNode):= &.nodes[.nextNode].nextNode++if len() == 1 {// leaf node.left = invalidNodeValue.leftValue = [0].value} else {.left, = (, , +1)}if != nil {return}if len() == 1 {// leaf node.right = invalidNodeValue.rightValue = [0].value} else {.right, = (, , +1)}return}
![]() |
The pages are generated with Golds v0.7.9-preview. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |