Source File
chacha8_generic.go
Belonging Package
internal/chacha8rand
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// ChaCha8 is ChaCha with 8 rounds.
// See https://cr.yp.to/chacha/chacha-20080128.pdf.
//
// ChaCha8 operates on a 4x4 matrix of uint32 values, initially set to:
//
// const1 const2 const3 const4
// seed seed seed seed
// seed seed seed seed
// counter64 0 0
//
// We use the same constants as ChaCha20 does, a random seed,
// and a counter. Running ChaCha8 on this input produces
// a 4x4 matrix of pseudo-random values with as much entropy
// as the seed.
//
// Given SIMD registers that can hold N uint32s, it is possible
// to run N ChaCha8 block transformations in parallel by filling
// the first register with the N copies of const1, the second
// with N copies of const2, and so on, and then running the operations.
//
// Each iteration of ChaCha8Rand operates over 32 bytes of input and
// produces 992 bytes of RNG output, plus 32 bytes of input for the next
// iteration.
//
// The 32 bytes of input are used as a ChaCha8 key, with a zero nonce, to
// produce 1024 bytes of output (16 blocks, with counters 0 to 15).
// First, for each block, the values 0x61707865, 0x3320646e, 0x79622d32,
// 0x6b206574 are subtracted from the 32-bit little-endian words at
// position 0, 1, 2, and 3 respectively, and an increasing counter
// starting at zero is subtracted from each word at position 12. Then,
// this stream is permuted such that for each sequence of four blocks,
// first we output the first four bytes of each block, then the next four
// bytes of each block, and so on. Finally, the last 32 bytes of output
// are used as the input of the next iteration, and the remaining 992
// bytes are the RNG output.
//
// See https://c2sp.org/chacha8rand for additional details.
//
// Normal ChaCha20 implementations for encryption use this same
// parallelism but then have to deinterlace the results so that
// it appears the blocks were generated separately. For the purposes
// of generating random numbers, the interlacing is fine.
// We are simply locked in to preserving the 4-way interlacing
// in any future optimizations.
package chacha8rand
import (
)
// setup sets up 4 ChaCha8 blocks in b32 with the counter and seed.
// Note that b32 is [16][4]uint32 not [4][16]uint32: the blocks are interlaced
// the same way they would be in a 4-way SIMD implementations.
func setup( *[4]uint64, *[16][4]uint32, uint32) {
// Convert to uint64 to do half as many stores to memory.
:= (*[16][2]uint64)(unsafe.Pointer())
// Constants; same as in ChaCha20: "expand 32-byte k"
[0][0] = 0x61707865_61707865
[0][1] = 0x61707865_61707865
[1][0] = 0x3320646e_3320646e
[1][1] = 0x3320646e_3320646e
[2][0] = 0x79622d32_79622d32
[2][1] = 0x79622d32_79622d32
[3][0] = 0x6b206574_6b206574
[3][1] = 0x6b206574_6b206574
// Seed values.
var uint64
var uint32
= uint32([0])
= uint64()<<32 | uint64()
[4][0] =
[4][1] =
= uint32([0] >> 32)
= uint64()<<32 | uint64()
[5][0] =
[5][1] =
= uint32([1])
= uint64()<<32 | uint64()
[6][0] =
[6][1] =
= uint32([1] >> 32)
= uint64()<<32 | uint64()
[7][0] =
[7][1] =
= uint32([2])
= uint64()<<32 | uint64()
[8][0] =
[8][1] =
= uint32([2] >> 32)
= uint64()<<32 | uint64()
[9][0] =
[9][1] =
= uint32([3])
= uint64()<<32 | uint64()
[10][0] =
[10][1] =
= uint32([3] >> 32)
= uint64()<<32 | uint64()
[11][0] =
[11][1] =
// Counters.
if goarch.BigEndian {
[12][0] = uint64(+0)<<32 | uint64(+1)
[12][1] = uint64(+2)<<32 | uint64(+3)
} else {
[12][0] = uint64(+0) | uint64(+1)<<32
[12][1] = uint64(+2) | uint64(+3)<<32
}
// Zeros.
[13][0] = 0
[13][1] = 0
[14][0] = 0
[14][1] = 0
[15][0] = 0
[15][1] = 0
}
func _() {
// block and block_generic must have same type
:= block
= block_generic
_ =
}
// block_generic is the non-assembly block implementation,
// for use on systems without special assembly.
// Even on such systems, it is quite fast: on GOOS=386,
// ChaCha8 using this code generates random values faster than PCG-DXSM.
func block_generic( *[4]uint64, *[32]uint64, uint32) {
:= (*[16][4]uint32)(unsafe.Pointer())
setup(, , )
for := range [0] {
// Load block i from b[*][i] into local variables.
:= [0][]
:= [1][]
:= [2][]
:= [3][]
:= [4][]
:= [5][]
:= [6][]
:= [7][]
:= [8][]
:= [9][]
:= [10][]
:= [11][]
:= [12][]
:= [13][]
:= [14][]
:= [15][]
// 4 iterations of eight quarter-rounds each is 8 rounds
for := 0; < 4; ++ {
, , , = qr(, , , )
, , , = qr(, , , )
, , , = qr(, , , )
, , , = qr(, , , )
, , , = qr(, , , )
, , , = qr(, , , )
, , , = qr(, , , )
, , , = qr(, , , )
}
// Store block i back into b[*][i].
// Add b4..b11 back to the original key material,
// like in ChaCha20, to avoid trivial invertibility.
// There is no entropy in b0..b3 and b12..b15
// so we can skip the additions and save some time.
[0][] =
[1][] =
[2][] =
[3][] =
[4][] +=
[5][] +=
[6][] +=
[7][] +=
[8][] +=
[9][] +=
[10][] +=
[11][] +=
[12][] =
[13][] =
[14][] =
[15][] =
}
if goarch.BigEndian {
// On a big-endian system, reading the uint32 pairs as uint64s
// will word-swap them compared to little-endian, so we word-swap
// them here first to make the next swap get the right answer.
for , := range {
[] = >>32 | <<32
}
}
}
// qr is the (inlinable) ChaCha8 quarter round.
func qr(, , , uint32) (, , , uint32) {
+=
^=
= <<16 | >>16
+=
^=
= <<12 | >>20
+=
^=
= <<8 | >>24
+=
^=
= <<7 | >>25
return , , ,
}
The pages are generated with Golds v0.7.3. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |