// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package maphash provides hash functions on byte sequences. // These hash functions are intended to be used to implement hash tables or // other data structures that need to map arbitrary strings or byte // sequences to a uniform distribution on unsigned 64-bit integers. // Each different instance of a hash table or data structure should use its own [Seed]. // // The hash functions are not cryptographically secure. // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
package maphash // A Seed is a random value that selects the specific hash function // computed by a [Hash]. If two Hashes use the same Seeds, they // will compute the same hash values for any given input. // If two Hashes use different Seeds, they are very likely to compute // distinct hash values for any given input. // // A Seed must be initialized by calling [MakeSeed]. // The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method. // // Each Seed value is local to a single process and cannot be serialized // or otherwise recreated in a different process. type Seed struct { s uint64 } // Bytes returns the hash of b with the given seed. // // Bytes is equivalent to, but more convenient and efficient than: // // var h Hash // h.SetSeed(seed) // h.Write(b) // return h.Sum64() func ( Seed, []byte) uint64 { := .s if == 0 { panic("maphash: use of uninitialized Seed") } if len() > bufSize { = [:len():len()] // merge len and cap calculations when reslicing for len() > bufSize { = rthash([:bufSize], ) = [bufSize:] } } return rthash(, ) } // String returns the hash of s with the given seed. // // String is equivalent to, but more convenient and efficient than: // // var h Hash // h.SetSeed(seed) // h.WriteString(s) // return h.Sum64() func ( Seed, string) uint64 { := .s if == 0 { panic("maphash: use of uninitialized Seed") } for len() > bufSize { = rthashString([:bufSize], ) = [bufSize:] } return rthashString(, ) } // A Hash computes a seeded hash of a byte sequence. // // The zero Hash is a valid Hash ready to use. // A zero Hash chooses a random seed for itself during // the first call to a Reset, Write, Seed, or Sum64 method. // For control over the seed, use SetSeed. // // The computed hash values depend only on the initial seed and // the sequence of bytes provided to the Hash object, not on the way // in which the bytes are provided. For example, the three sequences // // h.Write([]byte{'f','o','o'}) // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') // h.WriteString("foo") // // all have the same effect. // // Hashes are intended to be collision-resistant, even for situations // where an adversary controls the byte sequences being hashed. // // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. // If multiple goroutines must compute the same seeded hash, // each can declare its own Hash and call SetSeed with a common Seed. type Hash struct { _ [0]func() // not comparable seed Seed // initial seed used for this hash state Seed // current hash of all flushed bytes buf [bufSize]byte // unflushed byte buffer n int // number of unflushed bytes } // bufSize is the size of the Hash write buffer. // The buffer ensures that writes depend only on the sequence of bytes, // not the sequence of WriteByte/Write/WriteString calls, // by always calling rthash with a full buffer (except for the tail). const bufSize = 128 // initSeed seeds the hash if necessary. // initSeed is called lazily before any operation that actually uses h.seed/h.state. // Note that this does not include Write/WriteByte/WriteString in the case // where they only add to h.buf. (If they write too much, they call h.flush, // which does call h.initSeed.) func ( *Hash) () { if .seed.s == 0 { := MakeSeed() .seed = .state = } } // WriteByte adds b to the sequence of bytes hashed by h. // It never fails; the error result is for implementing [io.ByteWriter]. func ( *Hash) ( byte) error { if .n == len(.buf) { .flush() } .buf[.n] = .n++ return nil } // Write adds b to the sequence of bytes hashed by h. // It always writes all of b and never fails; the count and error result are for implementing [io.Writer]. func ( *Hash) ( []byte) (int, error) { := len() // Deal with bytes left over in h.buf. // h.n <= bufSize is always true. // Checking it is ~free and it lets the compiler eliminate a bounds check. if .n > 0 && .n <= bufSize { := copy(.buf[.n:], ) .n += if .n < bufSize { // Copied the entirety of b to h.buf. return , nil } = [:] .flush() // No need to set h.n = 0 here; it happens just before exit. } // Process as many full buffers as possible, without copying, and calling initSeed only once. if len() > bufSize { .initSeed() for len() > bufSize { .state.s = rthash([:bufSize], .state.s) = [bufSize:] } } // Copy the tail. copy(.buf[:], ) .n = len() return , nil } // WriteString adds the bytes of s to the sequence of bytes hashed by h. // It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter]. func ( *Hash) ( string) (int, error) { // WriteString mirrors Write. See Write for comments. := len() if .n > 0 && .n <= bufSize { := copy(.buf[.n:], ) .n += if .n < bufSize { return , nil } = [:] .flush() } if len() > bufSize { .initSeed() for len() > bufSize { .state.s = rthashString([:bufSize], .state.s) = [bufSize:] } } copy(.buf[:], ) .n = len() return , nil } // Seed returns h's seed value. func ( *Hash) () Seed { .initSeed() return .seed } // SetSeed sets h to use seed, which must have been returned by [MakeSeed] // or by another [Hash.Seed] method. // Two [Hash] objects with the same seed behave identically. // Two [Hash] objects with different seeds will very likely behave differently. // Any bytes added to h before this call will be discarded. func ( *Hash) ( Seed) { if .s == 0 { panic("maphash: use of uninitialized Seed") } .seed = .state = .n = 0 } // Reset discards all bytes added to h. // (The seed remains the same.) func ( *Hash) () { .initSeed() .state = .seed .n = 0 } // precondition: buffer is full. func ( *Hash) () { if .n != len(.buf) { panic("maphash: flush of partially full buffer") } .initSeed() .state.s = rthash(.buf[:.n], .state.s) .n = 0 } // Sum64 returns h's current 64-bit value, which depends on // h's seed and the sequence of bytes added to h since the // last call to [Hash.Reset] or [Hash.SetSeed]. // // All bits of the Sum64 result are close to uniformly and // independently distributed, so it can be safely reduced // by using bit masking, shifting, or modular arithmetic. func ( *Hash) () uint64 { .initSeed() return rthash(.buf[:.n], .state.s) } // MakeSeed returns a new random seed. func () Seed { var uint64 for { = randUint64() // We use seed 0 to indicate an uninitialized seed/hash, // so keep trying until we get a non-zero seed. if != 0 { break } } return Seed{s: } } // Sum appends the hash's current 64-bit value to b. // It exists for implementing [hash.Hash]. // For direct calls, it is more efficient to use [Hash.Sum64]. func ( *Hash) ( []byte) []byte { := .Sum64() return append(, byte(>>0), byte(>>8), byte(>>16), byte(>>24), byte(>>32), byte(>>40), byte(>>48), byte(>>56)) } // Size returns h's hash value size, 8 bytes. func ( *Hash) () int { return 8 } // BlockSize returns h's block size. func ( *Hash) () int { return len(.buf) }