Source File
maphash.go
Belonging Package
hash/maphash
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package maphash provides hash functions on byte sequences and comparable values.
// These hash functions are intended to be used to implement hash tables or
// other data structures that need to map arbitrary strings or byte
// sequences to a uniform distribution on unsigned 64-bit integers.
// Each different instance of a hash table or data structure should use its own [Seed].
//
// The hash functions are not cryptographically secure.
// (See crypto/sha256 and crypto/sha512 for cryptographic use.)
package maphash
import (
)
// A Seed is a random value that selects the specific hash function
// computed by a [Hash]. If two Hashes use the same Seeds, they
// will compute the same hash values for any given input.
// If two Hashes use different Seeds, they are very likely to compute
// distinct hash values for any given input.
//
// A Seed must be initialized by calling [MakeSeed].
// The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method.
//
// Each Seed value is local to a single process and cannot be serialized
// or otherwise recreated in a different process.
type Seed struct {
s uint64
}
// Bytes returns the hash of b with the given seed.
//
// Bytes is equivalent to, but more convenient and efficient than:
//
// var h Hash
// h.SetSeed(seed)
// h.Write(b)
// return h.Sum64()
func ( Seed, []byte) uint64 {
:= .s
if == 0 {
panic("maphash: use of uninitialized Seed")
}
if len() > bufSize {
= [:len():len()] // merge len and cap calculations when reslicing
for len() > bufSize {
= rthash([:bufSize], )
= [bufSize:]
}
}
return rthash(, )
}
// String returns the hash of s with the given seed.
//
// String is equivalent to, but more convenient and efficient than:
//
// var h Hash
// h.SetSeed(seed)
// h.WriteString(s)
// return h.Sum64()
func ( Seed, string) uint64 {
:= .s
if == 0 {
panic("maphash: use of uninitialized Seed")
}
for len() > bufSize {
= rthashString([:bufSize], )
= [bufSize:]
}
return rthashString(, )
}
// A Hash computes a seeded hash of a byte sequence.
//
// The zero Hash is a valid Hash ready to use.
// A zero Hash chooses a random seed for itself during
// the first call to a Reset, Write, Seed, or Sum64 method.
// For control over the seed, use SetSeed.
//
// The computed hash values depend only on the initial seed and
// the sequence of bytes provided to the Hash object, not on the way
// in which the bytes are provided. For example, the three sequences
//
// h.Write([]byte{'f','o','o'})
// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
// h.WriteString("foo")
//
// all have the same effect.
//
// Hashes are intended to be collision-resistant, even for situations
// where an adversary controls the byte sequences being hashed.
//
// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
// If multiple goroutines must compute the same seeded hash,
// each can declare its own Hash and call SetSeed with a common Seed.
type Hash struct {
_ [0]func() // not comparable
seed Seed // initial seed used for this hash
state Seed // current hash of all flushed bytes
buf [bufSize]byte // unflushed byte buffer
n int // number of unflushed bytes
}
// bufSize is the size of the Hash write buffer.
// The buffer ensures that writes depend only on the sequence of bytes,
// not the sequence of WriteByte/Write/WriteString calls,
// by always calling rthash with a full buffer (except for the tail).
const bufSize = 128
// initSeed seeds the hash if necessary.
// initSeed is called lazily before any operation that actually uses h.seed/h.state.
// Note that this does not include Write/WriteByte/WriteString in the case
// where they only add to h.buf. (If they write too much, they call h.flush,
// which does call h.initSeed.)
func ( *Hash) () {
if .seed.s == 0 {
:= MakeSeed()
.seed =
.state =
}
}
// WriteByte adds b to the sequence of bytes hashed by h.
// It never fails; the error result is for implementing [io.ByteWriter].
func ( *Hash) ( byte) error {
if .n == len(.buf) {
.flush()
}
.buf[.n] =
.n++
return nil
}
// Write adds b to the sequence of bytes hashed by h.
// It always writes all of b and never fails; the count and error result are for implementing [io.Writer].
func ( *Hash) ( []byte) (int, error) {
:= len()
// Deal with bytes left over in h.buf.
// h.n <= bufSize is always true.
// Checking it is ~free and it lets the compiler eliminate a bounds check.
if .n > 0 && .n <= bufSize {
:= copy(.buf[.n:], )
.n +=
if .n < bufSize {
// Copied the entirety of b to h.buf.
return , nil
}
= [:]
.flush()
// No need to set h.n = 0 here; it happens just before exit.
}
// Process as many full buffers as possible, without copying, and calling initSeed only once.
if len() > bufSize {
.initSeed()
for len() > bufSize {
.state.s = rthash([:bufSize], .state.s)
= [bufSize:]
}
}
// Copy the tail.
copy(.buf[:], )
.n = len()
return , nil
}
// WriteString adds the bytes of s to the sequence of bytes hashed by h.
// It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter].
func ( *Hash) ( string) (int, error) {
// WriteString mirrors Write. See Write for comments.
:= len()
if .n > 0 && .n <= bufSize {
:= copy(.buf[.n:], )
.n +=
if .n < bufSize {
return , nil
}
= [:]
.flush()
}
if len() > bufSize {
.initSeed()
for len() > bufSize {
.state.s = rthashString([:bufSize], .state.s)
= [bufSize:]
}
}
copy(.buf[:], )
.n = len()
return , nil
}
// Seed returns h's seed value.
func ( *Hash) () Seed {
.initSeed()
return .seed
}
// SetSeed sets h to use seed, which must have been returned by [MakeSeed]
// or by another [Hash.Seed] method.
// Two [Hash] objects with the same seed behave identically.
// Two [Hash] objects with different seeds will very likely behave differently.
// Any bytes added to h before this call will be discarded.
func ( *Hash) ( Seed) {
if .s == 0 {
panic("maphash: use of uninitialized Seed")
}
.seed =
.state =
.n = 0
}
// Reset discards all bytes added to h.
// (The seed remains the same.)
func ( *Hash) () {
.initSeed()
.state = .seed
.n = 0
}
// precondition: buffer is full.
func ( *Hash) () {
if .n != len(.buf) {
panic("maphash: flush of partially full buffer")
}
.initSeed()
.state.s = rthash(.buf[:.n], .state.s)
.n = 0
}
// Sum64 returns h's current 64-bit value, which depends on
// h's seed and the sequence of bytes added to h since the
// last call to [Hash.Reset] or [Hash.SetSeed].
//
// All bits of the Sum64 result are close to uniformly and
// independently distributed, so it can be safely reduced
// by using bit masking, shifting, or modular arithmetic.
func ( *Hash) () uint64 {
.initSeed()
return rthash(.buf[:.n], .state.s)
}
// MakeSeed returns a new random seed.
func () Seed {
var uint64
for {
= randUint64()
// We use seed 0 to indicate an uninitialized seed/hash,
// so keep trying until we get a non-zero seed.
if != 0 {
break
}
}
return Seed{s: }
}
// Sum appends the hash's current 64-bit value to b.
// It exists for implementing [hash.Hash].
// For direct calls, it is more efficient to use [Hash.Sum64].
func ( *Hash) ( []byte) []byte {
:= .Sum64()
return append(,
byte(>>0),
byte(>>8),
byte(>>16),
byte(>>24),
byte(>>32),
byte(>>40),
byte(>>48),
byte(>>56))
}
// Size returns h's hash value size, 8 bytes.
func ( *Hash) () int { return 8 }
// BlockSize returns h's block size.
func ( *Hash) () int { return len(.buf) }
// Comparable returns the hash of comparable value v with the given seed
// such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2.
// If v != v, then the resulting hash is randomly distributed.
func [ comparable]( Seed, ) uint64 {
escapeForHash()
return comparableHash(, )
}
// escapeForHash forces v to be on the heap, if v contains a
// non-string pointer. We cannot hash pointers to local variables,
// as the address of the local variable might change on stack growth.
// Strings are okay as the hash depends on only the content, not
// the pointer.
//
// This is essentially
//
// if hasNonStringPointers(T) { abi.Escape(v) }
//
// Implemented as a compiler intrinsic.
func escapeForHash[ comparable]( ) { panic("intrinsic") }
// WriteComparable adds x to the data hashed by h.
func [ comparable]( *Hash, ) {
escapeForHash()
// writeComparable (not in purego mode) directly operates on h.state
// without using h.buf. Mix in the buffer length so it won't
// commute with a buffered write, which either changes h.n or changes
// h.state.
if .n != 0 {
writeComparable(, .n)
}
writeComparable(, )
}
func ( *Hash) ( float64) {
if == 0 {
.WriteByte(0)
return
}
var [8]byte
if != {
byteorder.LEPutUint64([:], randUint64())
.Write([:])
return
}
byteorder.LEPutUint64([:], math.Float64bits())
.Write([:])
}
func btoi( bool) byte {
if {
return 1
}
return 0
}
The pages are generated with Golds v0.7.3. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |