// Copyright 2024 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package mlkemimport ()// fieldElement is an integer modulo q, an element of ℤ_q. It is always reduced.type fieldElement uint16// fieldCheckReduced checks that a value a is < q.func fieldCheckReduced( uint16) (fieldElement, error) {if >= q {return0, errors.New("unreduced field element") }returnfieldElement(), nil}// fieldReduceOnce reduces a value a < 2q.func fieldReduceOnce( uint16) fieldElement { := - q// If x underflowed, then x >= 2¹⁶ - q > 2¹⁵, so the top bit is set. += ( >> 15) * qreturnfieldElement()}func fieldAdd(, fieldElement) fieldElement { := uint16( + )returnfieldReduceOnce()}func fieldSub(, fieldElement) fieldElement { := uint16( - + q)returnfieldReduceOnce()}const ( barrettMultiplier = 5039// 2¹² * 2¹² / q barrettShift = 24// log₂(2¹² * 2¹²))// fieldReduce reduces a value a < 2q² using Barrett reduction, to avoid// potentially variable-time division.func fieldReduce( uint32) fieldElement { := uint32((uint64() * barrettMultiplier) >> barrettShift)returnfieldReduceOnce(uint16( - *q))}func fieldMul(, fieldElement) fieldElement { := uint32() * uint32()returnfieldReduce()}// fieldMulSub returns a * (b - c). This operation is fused to save a// fieldReduceOnce after the subtraction.func fieldMulSub(, , fieldElement) fieldElement { := uint32() * uint32(-+q)returnfieldReduce()}// fieldAddMul returns a * b + c * d. This operation is fused to save a// fieldReduceOnce and a fieldReduce.func fieldAddMul(, , , fieldElement) fieldElement { := uint32() * uint32() += uint32() * uint32()returnfieldReduce()}// compress maps a field element uniformly to the range 0 to 2ᵈ-1, according to// FIPS 203, Definition 4.7.func compress( fieldElement, uint8) uint16 {// We want to compute (x * 2ᵈ) / q, rounded to nearest integer, with 1/2 // rounding up (see FIPS 203, Section 2.3).// Barrett reduction produces a quotient and a remainder in the range [0, 2q), // such that dividend = quotient * q + remainder. := uint32() << // x * 2ᵈ := uint32(uint64() * barrettMultiplier >> barrettShift) := - *q// Since the remainder is in the range [0, 2q), not [0, q), we need to // portion it into three spans for rounding. // // [ 0, q/2 ) -> round to 0 // [ q/2, q + q/2 ) -> round to 1 // [ q + q/2, 2q ) -> round to 2 // // We can convert that to the following logic: add 1 if remainder > q/2, // then add 1 again if remainder > q + q/2. // // Note that if remainder > x, then ⌊x⌋ - remainder underflows, and the top // bit of the difference will be set. += (q/2 - ) >> 31 & 1 += (q + q/2 - ) >> 31 & 1// quotient might have overflowed at this point, so reduce it by masking.varuint32 = (1 << ) - 1returnuint16( & )}// decompress maps a number x between 0 and 2ᵈ-1 uniformly to the full range of// field elements, according to FIPS 203, Definition 4.8.func decompress( uint16, uint8) fieldElement {// We want to compute (y * q) / 2ᵈ, rounded to nearest integer, with 1/2 // rounding up (see FIPS 203, Section 2.3). := uint32() * q := >> // (y * q) / 2ᵈ// The d'th least-significant bit of the dividend (the most significant bit // of the remainder) is 1 for the top half of the values that divide to the // same quotient, which are the ones that round up. += >> ( - 1) & 1// quotient is at most (2¹¹-1) * q / 2¹¹ + 1 = 3328, so it didn't overflow.returnfieldElement()}// ringElement is a polynomial, an element of R_q, represented as an array// according to FIPS 203, Section 2.4.4.type ringElement [n]fieldElement// polyAdd adds two ringElements or nttElements.func polyAdd[ ~[n]fieldElement](, ) ( ) {for := range { [] = fieldAdd([], []) }return}// polySub subtracts two ringElements or nttElements.func polySub[ ~[n]fieldElement](, ) ( ) {for := range { [] = fieldSub([], []) }return}// polyByteEncode appends the 384-byte encoding of f to b.//// It implements ByteEncode₁₂, according to FIPS 203, Algorithm 5.func polyByteEncode[ ~[n]fieldElement]( []byte, ) []byte { , := sliceForAppend(, encodingSize12)for := 0; < n; += 2 { := uint32([]) | uint32([+1])<<12 [0] = uint8() [1] = uint8( >> 8) [2] = uint8( >> 16) = [3:] }return}// polyByteDecode decodes the 384-byte encoding of a polynomial, checking that// all the coefficients are properly reduced. This fulfills the "Modulus check"// step of ML-KEM Encapsulation.//// It implements ByteDecode₁₂, according to FIPS 203, Algorithm 6.func polyByteDecode[ ~[n]fieldElement]( []byte) (, error) {iflen() != encodingSize12 {return {}, errors.New("mlkem: invalid encoding length") }varfor := 0; < n; += 2 { := uint32([0]) | uint32([1])<<8 | uint32([2])<<16const = 0b1111_1111_1111varerrorif [], = fieldCheckReduced(uint16( & )); != nil {return {}, errors.New("mlkem: invalid polynomial encoding") }if [+1], = fieldCheckReduced(uint16( >> 12)); != nil {return {}, errors.New("mlkem: invalid polynomial encoding") } = [3:] }return , nil}// sliceForAppend takes a slice and a requested number of bytes. It returns a// slice with the contents of the given slice followed by that many bytes and a// second slice that aliases into it and contains only the extra bytes. If the// original slice has sufficient capacity then no allocation is performed.func sliceForAppend( []byte, int) (, []byte) {if := len() + ; cap() >= { = [:] } else { = make([]byte, )copy(, ) } = [len():]return}// ringCompressAndEncode1 appends a 32-byte encoding of a ring element to s,// compressing one coefficients per bit.//// It implements Compress₁, according to FIPS 203, Definition 4.7,// followed by ByteEncode₁, according to FIPS 203, Algorithm 5.func ringCompressAndEncode1( []byte, ringElement) []byte { , := sliceForAppend(, encodingSize1)for := range { [] = 0 }for := range { [/8] |= uint8(compress([], 1) << ( % 8)) }return}// ringDecodeAndDecompress1 decodes a 32-byte slice to a ring element where each// bit is mapped to 0 or ⌈q/2⌋.//// It implements ByteDecode₁, according to FIPS 203, Algorithm 6,// followed by Decompress₁, according to FIPS 203, Definition 4.8.func ringDecodeAndDecompress1( *[encodingSize1]byte) ringElement {varringElementfor := range { := [/8] >> ( % 8) & 1const = (q + 1) / 2// ⌈q/2⌋, rounded up per FIPS 203, Section 2.3 [] = fieldElement() * // 0 decompresses to 0, and 1 to ⌈q/2⌋ }return}// ringCompressAndEncode4 appends a 128-byte encoding of a ring element to s,// compressing two coefficients per byte.//// It implements Compress₄, according to FIPS 203, Definition 4.7,// followed by ByteEncode₄, according to FIPS 203, Algorithm 5.func ringCompressAndEncode4( []byte, ringElement) []byte { , := sliceForAppend(, encodingSize4)for := 0; < n; += 2 { [/2] = uint8(compress([], 4) | compress([+1], 4)<<4) }return}// ringDecodeAndDecompress4 decodes a 128-byte encoding of a ring element where// each four bits are mapped to an equidistant distribution.//// It implements ByteDecode₄, according to FIPS 203, Algorithm 6,// followed by Decompress₄, according to FIPS 203, Definition 4.8.func ringDecodeAndDecompress4( *[encodingSize4]byte) ringElement {varringElementfor := 0; < n; += 2 { [] = fieldElement(decompress(uint16([/2]&0b1111), 4)) [+1] = fieldElement(decompress(uint16([/2]>>4), 4)) }return}// ringCompressAndEncode10 appends a 320-byte encoding of a ring element to s,// compressing four coefficients per five bytes.//// It implements Compress₁₀, according to FIPS 203, Definition 4.7,// followed by ByteEncode₁₀, according to FIPS 203, Algorithm 5.func ringCompressAndEncode10( []byte, ringElement) []byte { , := sliceForAppend(, encodingSize10)for := 0; < n; += 4 {varuint64 |= uint64(compress([], 10)) |= uint64(compress([+1], 10)) << 10 |= uint64(compress([+2], 10)) << 20 |= uint64(compress([+3], 10)) << 30 [0] = uint8() [1] = uint8( >> 8) [2] = uint8( >> 16) [3] = uint8( >> 24) [4] = uint8( >> 32) = [5:] }return}// ringDecodeAndDecompress10 decodes a 320-byte encoding of a ring element where// each ten bits are mapped to an equidistant distribution.//// It implements ByteDecode₁₀, according to FIPS 203, Algorithm 6,// followed by Decompress₁₀, according to FIPS 203, Definition 4.8.func ringDecodeAndDecompress10( *[encodingSize10]byte) ringElement { := [:]varringElementfor := 0; < n; += 4 { := uint64([0]) | uint64([1])<<8 | uint64([2])<<16 | uint64([3])<<24 | uint64([4])<<32 = [5:] [] = fieldElement(decompress(uint16(>>0&0b11_1111_1111), 10)) [+1] = fieldElement(decompress(uint16(>>10&0b11_1111_1111), 10)) [+2] = fieldElement(decompress(uint16(>>20&0b11_1111_1111), 10)) [+3] = fieldElement(decompress(uint16(>>30&0b11_1111_1111), 10)) }return}// ringCompressAndEncode appends an encoding of a ring element to s,// compressing each coefficient to d bits.//// It implements Compress, according to FIPS 203, Definition 4.7,// followed by ByteEncode, according to FIPS 203, Algorithm 5.func ringCompressAndEncode( []byte, ringElement, uint8) []byte {varbytevaruint8for := 0; < n; ++ { := compress([], )varuint8for < { |= byte(>>) << := min(8-, -) += += if == 8 { = append(, ) = 0 = 0 } } }if != 0 {panic("mlkem: internal error: bitsFilled != 0") }return}// ringDecodeAndDecompress decodes an encoding of a ring element where// each d bits are mapped to an equidistant distribution.//// It implements ByteDecode, according to FIPS 203, Algorithm 6,// followed by Decompress, according to FIPS 203, Definition 4.8.func ringDecodeAndDecompress( []byte, uint8) ringElement {varringElementvaruint8for := 0; < n; ++ {varuint16varuint8for < { |= uint16([0]>>) << &= (1 << ) - 1 := min(8-, -) += += if == 8 { = [1:] = 0 } } [] = fieldElement(decompress(, )) }iflen() != 0 {panic("mlkem: internal error: leftover bytes") }return}// ringCompressAndEncode5 appends a 160-byte encoding of a ring element to s,// compressing eight coefficients per five bytes.//// It implements Compress₅, according to FIPS 203, Definition 4.7,// followed by ByteEncode₅, according to FIPS 203, Algorithm 5.func ringCompressAndEncode5( []byte, ringElement) []byte {returnringCompressAndEncode(, , 5)}// ringDecodeAndDecompress5 decodes a 160-byte encoding of a ring element where// each five bits are mapped to an equidistant distribution.//// It implements ByteDecode₅, according to FIPS 203, Algorithm 6,// followed by Decompress₅, according to FIPS 203, Definition 4.8.func ringDecodeAndDecompress5( *[encodingSize5]byte) ringElement {returnringDecodeAndDecompress([:], 5)}// ringCompressAndEncode11 appends a 352-byte encoding of a ring element to s,// compressing eight coefficients per eleven bytes.//// It implements Compress₁₁, according to FIPS 203, Definition 4.7,// followed by ByteEncode₁₁, according to FIPS 203, Algorithm 5.func ringCompressAndEncode11( []byte, ringElement) []byte {returnringCompressAndEncode(, , 11)}// ringDecodeAndDecompress11 decodes a 352-byte encoding of a ring element where// each eleven bits are mapped to an equidistant distribution.//// It implements ByteDecode₁₁, according to FIPS 203, Algorithm 6,// followed by Decompress₁₁, according to FIPS 203, Definition 4.8.func ringDecodeAndDecompress11( *[encodingSize11]byte) ringElement {returnringDecodeAndDecompress([:], 11)}// samplePolyCBD draws a ringElement from the special Dη distribution given a// stream of random bytes generated by the PRF function, according to FIPS 203,// Algorithm 8 and Definition 4.3.func samplePolyCBD( []byte, byte) ringElement { := sha3.NewShake256() .Write() .Write([]byte{}) := make([]byte, 64*2) // η = 2 .Read()// SamplePolyCBD simply draws four (2η) bits for each coefficient, and adds // the first two and subtracts the last two.varringElementfor := 0; < n; += 2 { := [/2] , , , := >>7, >>6&1, >>5&1, >>4&1 , , , := >>3&1, >>2&1, >>1&1, &1 [] = fieldSub(fieldElement(+), fieldElement(+)) [+1] = fieldSub(fieldElement(+), fieldElement(+)) }return}// nttElement is an NTT representation, an element of T_q, represented as an// array according to FIPS 203, Section 2.4.4.type nttElement [n]fieldElement// gammas are the values ζ^2BitRev7(i)+1 mod q for each index i, according to// FIPS 203, Appendix A (with negative values reduced to positive).var gammas = [128]fieldElement{17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, 2110, 1219, 2935, 394, 885, 2444, 2154, 1175}// nttMul multiplies two nttElements.//// It implements MultiplyNTTs, according to FIPS 203, Algorithm 11.func nttMul(, nttElement) nttElement {varnttElement// We use i += 2 for bounds check elimination. See https://go.dev/issue/66826.for := 0; < 256; += 2 { , := [], [+1] , := [], [+1] [] = fieldAddMul(, , fieldMul(, ), gammas[/2]) [+1] = fieldAddMul(, , , ) }return}// zetas are the values ζ^BitRev7(k) mod q for each index k, according to FIPS// 203, Appendix A.var zetas = [128]fieldElement{1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154}// ntt maps a ringElement to its nttElement representation.//// It implements NTT, according to FIPS 203, Algorithm 9.func ntt( ringElement) nttElement { := 1for := 128; >= 2; /= 2 {for := 0; < 256; += 2 * { := zetas[] ++// Bounds check elimination hint. , := [:+], [+:++]for := 0; < ; ++ { := fieldMul(, []) [] = fieldSub([], ) [] = fieldAdd([], ) } } }returnnttElement()}// inverseNTT maps a nttElement back to the ringElement it represents.//// It implements NTT⁻¹, according to FIPS 203, Algorithm 10.func inverseNTT( nttElement) ringElement { := 127for := 2; <= 128; *= 2 {for := 0; < 256; += 2 * { := zetas[] --// Bounds check elimination hint. , := [:+], [+:++]for := 0; < ; ++ { := [] [] = fieldAdd(, []) [] = fieldMulSub(, [], ) } } }for := range { [] = fieldMul([], 3303) // 3303 = 128⁻¹ mod q }returnringElement()}// sampleNTT draws a uniformly random nttElement from a stream of uniformly// random bytes generated by the XOF function, according to FIPS 203,// Algorithm 7.func sampleNTT( []byte, , byte) nttElement { := sha3.NewShake128() .Write() .Write([]byte{, })// SampleNTT essentially draws 12 bits at a time from r, interprets them in // little-endian, and rejects values higher than q, until it drew 256 // values. (The rejection rate is approximately 19%.) // // To do this from a bytes stream, it draws three bytes at a time, and // splits them into two uint16 appropriately masked. // // r₀ r₁ r₂ // |- - - - - - - -|- - - - - - - -|- - - - - - - -| // // Uint16(r₀ || r₁) // |- - - - - - - - - - - - - - - -| // |- - - - - - - - - - - -| // d₁ // // Uint16(r₁ || r₂) // |- - - - - - - - - - - - - - - -| // |- - - - - - - - - - - -| // d₂ // // Note that in little-endian, the rightmost bits are the most significant // bits (dropped with a mask) and the leftmost bits are the least // significant bits (dropped with a right shift).varnttElementvarint// index into avar [24]byte// buffered reads from B := len() // index into buf, starts in a "buffer fully consumed" statefor {if >= len() { .Read([:]) = 0 } := byteorder.LEUint16([:]) & 0b1111_1111_1111 := byteorder.LEUint16([+1:]) >> 4 += 3if < q { [] = fieldElement() ++ }if >= len() {break }if < q { [] = fieldElement() ++ }if >= len() {break } }return}
The pages are generated with Goldsv0.7.3-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.