// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build amd64,!gccgo,!appengine,!purego

package curve25519

// These functions are implemented in the .s files. The names of the functions
// in the rest of the file are also taken from the SUPERCOP sources to help
// people following along.

//go:noescape

func cswap( *[5]uint64,  uint64)

//go:noescape

func ladderstep( *[5][5]uint64)

//go:noescape

func freeze( *[5]uint64)

//go:noescape

func mul(, ,  *[5]uint64)

//go:noescape

func square(,  *[5]uint64)

// mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
func mladder(,  *[5]uint64,  *[32]byte) {
	var  [5][5]uint64

	[0] = *
	setint(&[1], 1)
	setint(&[2], 0)
	[3] = *
	setint(&[4], 1)

	 := uint(6)
	var  byte

	for  := 31;  >= 0; -- {
		for  < 8 {
			 := ((*)[] >> ) & 1
			 :=  ^ 
			 = 
			cswap(&[1], uint64())
			ladderstep(&)
			--
		}
		 = 7
	}

	* = [1]
	* = [2]
}

func scalarMult(, ,  *[32]byte) {
	var  [32]byte
	copy([:], (*)[:])
	[0] &= 248
	[31] &= 127
	[31] |= 64

	var ,  [5]uint64
	unpack(&, )
	mladder(&, &, &)
	invert(&, &)
	mul(&, &, &)
	pack(, &)
}

func setint( *[5]uint64,  uint64) {
	[0] = 
	[1] = 0
	[2] = 0
	[3] = 0
	[4] = 0
}

// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
// order.
func unpack( *[5]uint64,  *[32]byte) {
	[0] = uint64([0]) |
		uint64([1])<<8 |
		uint64([2])<<16 |
		uint64([3])<<24 |
		uint64([4])<<32 |
		uint64([5])<<40 |
		uint64([6]&7)<<48

	[1] = uint64([6])>>3 |
		uint64([7])<<5 |
		uint64([8])<<13 |
		uint64([9])<<21 |
		uint64([10])<<29 |
		uint64([11])<<37 |
		uint64([12]&63)<<45

	[2] = uint64([12])>>6 |
		uint64([13])<<2 |
		uint64([14])<<10 |
		uint64([15])<<18 |
		uint64([16])<<26 |
		uint64([17])<<34 |
		uint64([18])<<42 |
		uint64([19]&1)<<50

	[3] = uint64([19])>>1 |
		uint64([20])<<7 |
		uint64([21])<<15 |
		uint64([22])<<23 |
		uint64([23])<<31 |
		uint64([24])<<39 |
		uint64([25]&15)<<47

	[4] = uint64([25])>>4 |
		uint64([26])<<4 |
		uint64([27])<<12 |
		uint64([28])<<20 |
		uint64([29])<<28 |
		uint64([30])<<36 |
		uint64([31]&127)<<44
}

// pack sets out = x where out is the usual, little-endian form of the 5,
// 51-bit limbs in x.
func pack( *[32]byte,  *[5]uint64) {
	 := *
	freeze(&)

	[0] = byte([0])
	[1] = byte([0] >> 8)
	[2] = byte([0] >> 16)
	[3] = byte([0] >> 24)
	[4] = byte([0] >> 32)
	[5] = byte([0] >> 40)
	[6] = byte([0] >> 48)

	[6] ^= byte([1]<<3) & 0xf8
	[7] = byte([1] >> 5)
	[8] = byte([1] >> 13)
	[9] = byte([1] >> 21)
	[10] = byte([1] >> 29)
	[11] = byte([1] >> 37)
	[12] = byte([1] >> 45)

	[12] ^= byte([2]<<6) & 0xc0
	[13] = byte([2] >> 2)
	[14] = byte([2] >> 10)
	[15] = byte([2] >> 18)
	[16] = byte([2] >> 26)
	[17] = byte([2] >> 34)
	[18] = byte([2] >> 42)
	[19] = byte([2] >> 50)

	[19] ^= byte([3]<<1) & 0xfe
	[20] = byte([3] >> 7)
	[21] = byte([3] >> 15)
	[22] = byte([3] >> 23)
	[23] = byte([3] >> 31)
	[24] = byte([3] >> 39)
	[25] = byte([3] >> 47)

	[25] ^= byte([4]<<4) & 0xf0
	[26] = byte([4] >> 4)
	[27] = byte([4] >> 12)
	[28] = byte([4] >> 20)
	[29] = byte([4] >> 28)
	[30] = byte([4] >> 36)
	[31] = byte([4] >> 44)
}

// invert calculates r = x^-1 mod p using Fermat's little theorem.
func invert( *[5]uint64,  *[5]uint64) {
	var , , , , , , , ,  [5]uint64

	square(&, )        /* 2 */
	square(&, &)       /* 4 */
	square(&, &)        /* 8 */
	mul(&, &, )       /* 9 */
	mul(&, &, &)   /* 11 */
	square(&, &)      /* 22 */
	mul(&, &, &) /* 2^5 - 2^0 = 31 */

	square(&, &)      /* 2^6 - 2^1 */
	for  := 1;  < 5; ++ { /* 2^20 - 2^10 */
		square(&, &)
	}
	mul(&, &, &) /* 2^10 - 2^0 */

	square(&, &)      /* 2^11 - 2^1 */
	for  := 1;  < 10; ++ { /* 2^20 - 2^10 */
		square(&, &)
	}
	mul(&, &, &) /* 2^20 - 2^0 */

	square(&, &)      /* 2^21 - 2^1 */
	for  := 1;  < 20; ++ { /* 2^40 - 2^20 */
		square(&, &)
	}
	mul(&, &, &) /* 2^40 - 2^0 */

	square(&, &)            /* 2^41 - 2^1 */
	for  := 1;  < 10; ++ { /* 2^50 - 2^10 */
		square(&, &)
	}
	mul(&, &, &) /* 2^50 - 2^0 */

	square(&, &)      /* 2^51 - 2^1 */
	for  := 1;  < 50; ++ { /* 2^100 - 2^50 */
		square(&, &)
	}
	mul(&, &, &) /* 2^100 - 2^0 */

	square(&, &)      /* 2^101 - 2^1 */
	for  := 1;  < 100; ++ { /* 2^200 - 2^100 */
		square(&, &)
	}
	mul(&, &, &) /* 2^200 - 2^0 */

	square(&, &)            /* 2^201 - 2^1 */
	for  := 1;  < 50; ++ { /* 2^250 - 2^50 */
		square(&, &)
	}
	mul(&, &, &) /* 2^250 - 2^0 */

	square(&, &) /* 2^251 - 2^1 */
	square(&, &) /* 2^252 - 2^2 */
	square(&, &) /* 2^253 - 2^3 */

	square(&, &) /* 2^254 - 2^4 */

	square(&, &)   /* 2^255 - 2^5 */
	mul(, &, &) /* 2^255 - 21 */
}