// Code generated by command: go run gcm_amd64_asm.go -out ../../gcm_amd64.s -pkg aes. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
// func gcmAesFinish(productTable *[256]byte, tagMask *[16]byte, T *[16]byte, pLen uint64, dLen uint64)
// Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
TEXT ·gcmAesFinish(SB), NOSPLIT, $0-40
MOVQ productTable+0(FP), DI
MOVQ tagMask+8(FP), SI
MOVQ T+16(FP), DX
MOVQ pLen+24(FP), AX
MOVQ dLen+32(FP), CX
MOVOU (DX), X8
MOVOU (SI), X13
MOVOU bswapMask<>+0(SB), X15
MOVOU gcmPoly<>+0(SB), X14
SHLQ $0x03, AX
SHLQ $0x03, CX
MOVQ AX, X0
PINSRQ $0x01, CX, X0
PXOR X8, X0
MOVOU 224(DI), X8
MOVOU 240(DI), X10
MOVOU X8, X9
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PSHUFD $0x4e, X0, X11
PXOR X0, X11
PCLMULQDQ $0x00, X11, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
PSHUFB X15, X8
PXOR X13, X8
MOVOU X8, (DX)
RET
DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f
DATA bswapMask<>+8(SB)/8, $0x0001020304050607
GLOBL bswapMask<>(SB), RODATA|NOPTR, $16
DATA gcmPoly<>+0(SB)/8, $0x0000000000000001
DATA gcmPoly<>+8(SB)/8, $0xc200000000000000
GLOBL gcmPoly<>(SB), RODATA|NOPTR, $16
// func gcmAesInit(productTable *[256]byte, ks []uint32)
// Requires: AES, PCLMULQDQ, SSE2, SSSE3
TEXT ·gcmAesInit(SB), NOSPLIT, $0-32
MOVQ productTable+0(FP), DI
MOVQ ks_base+8(FP), SI
MOVQ ks_len+16(FP), DX
SHRQ $0x02, DX
DECQ DX
MOVOU bswapMask<>+0(SB), X15
MOVOU gcmPoly<>+0(SB), X14
// Encrypt block 0, with the AES key to generate the hash key H
MOVOU (SI), X0
MOVOU 16(SI), X11
AESENC X11, X0
MOVOU 32(SI), X11
AESENC X11, X0
MOVOU 48(SI), X11
AESENC X11, X0
MOVOU 64(SI), X11
AESENC X11, X0
MOVOU 80(SI), X11
AESENC X11, X0
MOVOU 96(SI), X11
AESENC X11, X0
MOVOU 112(SI), X11
AESENC X11, X0
MOVOU 128(SI), X11
AESENC X11, X0
MOVOU 144(SI), X11
AESENC X11, X0
MOVOU 160(SI), X11
CMPQ DX, $0x0c
JB initEncLast
AESENC X11, X0
MOVOU 176(SI), X11
AESENC X11, X0
MOVOU 192(SI), X11
JE initEncLast
AESENC X11, X0
MOVOU 208(SI), X11
AESENC X11, X0
MOVOU 224(SI), X11
initEncLast:
AESENCLAST X11, X0
PSHUFB X15, X0
// H * 2
PSHUFD $0xff, X0, X11
MOVOU X0, X12
PSRAL $0x1f, X11
PAND X14, X11
PSRLL $0x1f, X12
PSLLDQ $0x04, X12
PSLLL $0x01, X0
PXOR X11, X0
PXOR X12, X0
// Karatsuba pre-computations
MOVOU X0, 224(DI)
PSHUFD $0x4e, X0, X1
PXOR X0, X1
MOVOU X1, 240(DI)
MOVOU X0, X2
MOVOU X1, X3
// Now prepare powers of H and pre-computations for them
MOVQ $0x00000007, AX
initLoop:
MOVOU X2, X11
MOVOU X2, X12
MOVOU X3, X13
PCLMULQDQ $0x00, X0, X11
PCLMULQDQ $0x11, X0, X12
PCLMULQDQ $0x00, X1, X13
PXOR X11, X13
PXOR X12, X13
MOVOU X13, X4
PSLLDQ $0x08, X4
PSRLDQ $0x08, X13
PXOR X4, X11
PXOR X13, X12
MOVOU X14, X2
PCLMULQDQ $0x01, X11, X2
PSHUFD $0x4e, X11, X11
PXOR X2, X11
MOVOU X14, X2
PCLMULQDQ $0x01, X11, X2
PSHUFD $0x4e, X11, X11
PXOR X11, X2
PXOR X12, X2
MOVOU X2, 192(DI)
PSHUFD $0x4e, X2, X3
PXOR X2, X3
MOVOU X3, 208(DI)
DECQ AX
LEAQ -32(DI), DI
JNE initLoop
RET
// func gcmAesData(productTable *[256]byte, data []byte, T *[16]byte)
// Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
TEXT ·gcmAesData(SB), NOSPLIT, $0-40
MOVQ productTable+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
MOVQ T+32(FP), CX
PXOR X8, X8
MOVOU bswapMask<>+0(SB), X15
MOVOU gcmPoly<>+0(SB), X14
TESTQ DX, DX
JEQ dataBail
CMPQ DX, $0x0d
JE dataTLS
CMPQ DX, $0x80
JB startSinglesLoop
JMP dataOctaLoop
dataTLS:
MOVOU 224(DI), X12
MOVOU 240(DI), X13
PXOR X0, X0
MOVQ (SI), X0
PINSRD $0x02, 8(SI), X0
PINSRB $0x0c, 12(SI), X0
XORQ DX, DX
JMP dataMul
dataOctaLoop:
CMPQ DX, $0x80
JB startSinglesLoop
SUBQ $0x80, DX
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
LEAQ 128(SI), SI
PSHUFB X15, X0
PSHUFB X15, X1
PSHUFB X15, X2
PSHUFB X15, X3
PSHUFB X15, X4
PSHUFB X15, X5
PSHUFB X15, X6
PSHUFB X15, X7
PXOR X8, X0
MOVOU (DI), X8
MOVOU 16(DI), X10
MOVOU X8, X9
PSHUFD $0x4e, X0, X12
PXOR X0, X12
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PCLMULQDQ $0x00, X12, X10
MOVOU 32(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X1, X12
PXOR X12, X8
PCLMULQDQ $0x11, X1, X13
PXOR X13, X9
PSHUFD $0x4e, X1, X12
PXOR X12, X1
MOVOU 48(DI), X12
PCLMULQDQ $0x00, X1, X12
PXOR X12, X10
MOVOU 64(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X2, X12
PXOR X12, X8
PCLMULQDQ $0x11, X2, X13
PXOR X13, X9
PSHUFD $0x4e, X2, X12
PXOR X12, X2
MOVOU 80(DI), X12
PCLMULQDQ $0x00, X2, X12
PXOR X12, X10
MOVOU 96(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X3, X12
PXOR X12, X8
PCLMULQDQ $0x11, X3, X13
PXOR X13, X9
PSHUFD $0x4e, X3, X12
PXOR X12, X3
MOVOU 112(DI), X12
PCLMULQDQ $0x00, X3, X12
PXOR X12, X10
MOVOU 128(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X4, X12
PXOR X12, X8
PCLMULQDQ $0x11, X4, X13
PXOR X13, X9
PSHUFD $0x4e, X4, X12
PXOR X12, X4
MOVOU 144(DI), X12
PCLMULQDQ $0x00, X4, X12
PXOR X12, X10
MOVOU 160(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X5, X12
PXOR X12, X8
PCLMULQDQ $0x11, X5, X13
PXOR X13, X9
PSHUFD $0x4e, X5, X12
PXOR X12, X5
MOVOU 176(DI), X12
PCLMULQDQ $0x00, X5, X12
PXOR X12, X10
MOVOU 192(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X6, X12
PXOR X12, X8
PCLMULQDQ $0x11, X6, X13
PXOR X13, X9
PSHUFD $0x4e, X6, X12
PXOR X12, X6
MOVOU 208(DI), X12
PCLMULQDQ $0x00, X6, X12
PXOR X12, X10
MOVOU 224(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X7, X12
PXOR X12, X8
PCLMULQDQ $0x11, X7, X13
PXOR X13, X9
PSHUFD $0x4e, X7, X12
PXOR X12, X7
MOVOU 240(DI), X12
PCLMULQDQ $0x00, X7, X12
PXOR X12, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
JMP dataOctaLoop
startSinglesLoop:
MOVOU 224(DI), X12
MOVOU 240(DI), X13
dataSinglesLoop:
CMPQ DX, $0x10
JB dataEnd
SUBQ $0x10, DX
MOVOU (SI), X0
dataMul:
PSHUFB X15, X0
PXOR X8, X0
MOVOU X12, X8
MOVOU X13, X10
MOVOU X12, X9
PSHUFD $0x4e, X0, X11
PXOR X0, X11
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PCLMULQDQ $0x00, X11, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
LEAQ 16(SI), SI
JMP dataSinglesLoop
dataEnd:
TESTQ DX, DX
JEQ dataBail
PXOR X0, X0
LEAQ -1(SI)(DX*1), SI
dataLoadLoop:
PSLLDQ $0x01, X0
PINSRB $0x00, (SI), X0
LEAQ -1(SI), SI
DECQ DX
JNE dataLoadLoop
JMP dataMul
dataBail:
MOVOU X8, (CX)
RET
// func gcmAesEnc(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
// Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
TEXT ·gcmAesEnc(SB), $256-96
MOVQ productTable+0(FP), DI
MOVQ dst_base+8(FP), DX
MOVQ src_base+32(FP), SI
MOVQ src_len+40(FP), R9
MOVQ ctr+56(FP), CX
MOVQ T+64(FP), R8
MOVQ ks_base+72(FP), AX
MOVQ ks_len+80(FP), R13
SHRQ $0x02, R13
DECQ R13
MOVOU bswapMask<>+0(SB), X15
MOVOU gcmPoly<>+0(SB), X14
MOVOU (R8), X8
PXOR X9, X9
PXOR X10, X10
MOVOU (CX), X0
MOVL 12(CX), R10
MOVOU (AX), X11
MOVL 12(AX), R12
BSWAPL R10
BSWAPL R12
PXOR X0, X11
MOVOU X11, 128(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 140(SP)
CMPQ R9, $0x80
JB gcmAesEncSingles
SUBQ $0x80, R9
// We have at least 8 blocks to encrypt, prepare the rest of the counters
MOVOU X11, 144(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 156(SP)
MOVOU X11, 160(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 172(SP)
MOVOU X11, 176(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 188(SP)
MOVOU X11, 192(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 204(SP)
MOVOU X11, 208(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 220(SP)
MOVOU X11, 224(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 236(SP)
MOVOU X11, 240(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 252(SP)
MOVOU 128(SP), X0
MOVOU 144(SP), X1
MOVOU 160(SP), X2
MOVOU 176(SP), X3
MOVOU 192(SP), X4
MOVOU 208(SP), X5
MOVOU 224(SP), X6
MOVOU 240(SP), X7
MOVOU 16(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 140(SP)
MOVOU 32(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 156(SP)
MOVOU 48(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 172(SP)
MOVOU 64(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 188(SP)
MOVOU 80(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 204(SP)
MOVOU 96(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 220(SP)
MOVOU 112(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 236(SP)
MOVOU 128(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 252(SP)
MOVOU 144(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB encLast1
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 176(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 192(AX), X11
JE encLast1
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 208(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 224(AX), X11
encLast1:
AESENCLAST X11, X0
AESENCLAST X11, X1
AESENCLAST X11, X2
AESENCLAST X11, X3
AESENCLAST X11, X4
AESENCLAST X11, X5
AESENCLAST X11, X6
AESENCLAST X11, X7
MOVOU (SI), X11
PXOR X11, X0
MOVOU 16(SI), X11
PXOR X11, X1
MOVOU 32(SI), X11
PXOR X11, X2
MOVOU 48(SI), X11
PXOR X11, X3
MOVOU 64(SI), X11
PXOR X11, X4
MOVOU 80(SI), X11
PXOR X11, X5
MOVOU 96(SI), X11
PXOR X11, X6
MOVOU 112(SI), X11
PXOR X11, X7
MOVOU X0, (DX)
PSHUFB X15, X0
PXOR X8, X0
MOVOU X1, 16(DX)
PSHUFB X15, X1
MOVOU X2, 32(DX)
PSHUFB X15, X2
MOVOU X3, 48(DX)
PSHUFB X15, X3
MOVOU X4, 64(DX)
PSHUFB X15, X4
MOVOU X5, 80(DX)
PSHUFB X15, X5
MOVOU X6, 96(DX)
PSHUFB X15, X6
MOVOU X7, 112(DX)
PSHUFB X15, X7
MOVOU X0, (SP)
MOVOU X1, 16(SP)
MOVOU X2, 32(SP)
MOVOU X3, 48(SP)
MOVOU X4, 64(SP)
MOVOU X5, 80(SP)
MOVOU X6, 96(SP)
MOVOU X7, 112(SP)
LEAQ 128(SI), SI
LEAQ 128(DX), DX
gcmAesEncOctetsLoop:
CMPQ R9, $0x80
JB gcmAesEncOctetsEnd
SUBQ $0x80, R9
MOVOU 128(SP), X0
MOVOU 144(SP), X1
MOVOU 160(SP), X2
MOVOU 176(SP), X3
MOVOU 192(SP), X4
MOVOU 208(SP), X5
MOVOU 224(SP), X6
MOVOU 240(SP), X7
MOVOU (SP), X11
PSHUFD $0x4e, X11, X12
PXOR X11, X12
MOVOU (DI), X8
MOVOU 16(DI), X10
MOVOU X8, X9
PCLMULQDQ $0x00, X12, X10
PCLMULQDQ $0x00, X11, X8
PCLMULQDQ $0x11, X11, X9
MOVOU 16(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 32(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 16(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 48(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 140(SP)
MOVOU 32(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 64(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 32(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 80(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 156(SP)
MOVOU 48(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 96(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 48(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 112(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 172(SP)
MOVOU 64(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 128(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 64(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 144(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 188(SP)
MOVOU 80(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 160(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 80(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 176(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 204(SP)
MOVOU 96(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 192(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 96(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 208(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 220(SP)
MOVOU 112(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 224(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 112(SP), X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 240(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 236(SP)
MOVOU 128(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 252(SP)
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU 144(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB encLast2
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 176(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 192(AX), X11
JE encLast2
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 208(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 224(AX), X11
encLast2:
AESENCLAST X11, X0
AESENCLAST X11, X1
AESENCLAST X11, X2
AESENCLAST X11, X3
AESENCLAST X11, X4
AESENCLAST X11, X5
AESENCLAST X11, X6
AESENCLAST X11, X7
MOVOU (SI), X11
PXOR X11, X0
MOVOU 16(SI), X11
PXOR X11, X1
MOVOU 32(SI), X11
PXOR X11, X2
MOVOU 48(SI), X11
PXOR X11, X3
MOVOU 64(SI), X11
PXOR X11, X4
MOVOU 80(SI), X11
PXOR X11, X5
MOVOU 96(SI), X11
PXOR X11, X6
MOVOU 112(SI), X11
PXOR X11, X7
MOVOU X0, (DX)
PSHUFB X15, X0
PXOR X8, X0
MOVOU X1, 16(DX)
PSHUFB X15, X1
MOVOU X2, 32(DX)
PSHUFB X15, X2
MOVOU X3, 48(DX)
PSHUFB X15, X3
MOVOU X4, 64(DX)
PSHUFB X15, X4
MOVOU X5, 80(DX)
PSHUFB X15, X5
MOVOU X6, 96(DX)
PSHUFB X15, X6
MOVOU X7, 112(DX)
PSHUFB X15, X7
MOVOU X0, (SP)
MOVOU X1, 16(SP)
MOVOU X2, 32(SP)
MOVOU X3, 48(SP)
MOVOU X4, 64(SP)
MOVOU X5, 80(SP)
MOVOU X6, 96(SP)
MOVOU X7, 112(SP)
LEAQ 128(SI), SI
LEAQ 128(DX), DX
JMP gcmAesEncOctetsLoop
gcmAesEncOctetsEnd:
MOVOU (SP), X11
MOVOU (DI), X8
MOVOU 16(DI), X10
MOVOU X8, X9
PSHUFD $0x4e, X11, X12
PXOR X11, X12
PCLMULQDQ $0x00, X11, X8
PCLMULQDQ $0x11, X11, X9
PCLMULQDQ $0x00, X12, X10
MOVOU 16(SP), X11
MOVOU 32(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 48(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
MOVOU 32(SP), X11
MOVOU 64(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 80(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
MOVOU 48(SP), X11
MOVOU 96(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 112(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
MOVOU 64(SP), X11
MOVOU 128(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 144(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
MOVOU 80(SP), X11
MOVOU 160(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 176(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
MOVOU 96(SP), X11
MOVOU 192(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 208(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
MOVOU 112(SP), X11
MOVOU 224(DI), X12
MOVOU X12, X13
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PCLMULQDQ $0x11, X11, X13
PXOR X13, X9
PSHUFD $0x4e, X11, X12
PXOR X12, X11
MOVOU 240(DI), X12
PCLMULQDQ $0x00, X11, X12
PXOR X12, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
TESTQ R9, R9
JE gcmAesEncDone
SUBQ $0x07, R10
gcmAesEncSingles:
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 224(DI), X13
gcmAesEncSinglesLoop:
CMPQ R9, $0x10
JB gcmAesEncTail
SUBQ $0x10, R9
MOVOU 128(SP), X0
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 140(SP)
AESENC X1, X0
AESENC X2, X0
AESENC X3, X0
AESENC X4, X0
AESENC X5, X0
AESENC X6, X0
AESENC X7, X0
MOVOU 128(AX), X11
AESENC X11, X0
MOVOU 144(AX), X11
AESENC X11, X0
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB encLast3
AESENC X11, X0
MOVOU 176(AX), X11
AESENC X11, X0
MOVOU 192(AX), X11
JE encLast3
AESENC X11, X0
MOVOU 208(AX), X11
AESENC X11, X0
MOVOU 224(AX), X11
encLast3:
AESENCLAST X11, X0
MOVOU (SI), X11
PXOR X11, X0
MOVOU X0, (DX)
PSHUFB X15, X0
PXOR X8, X0
MOVOU X13, X8
MOVOU X13, X9
MOVOU 240(DI), X10
PSHUFD $0x4e, X0, X11
PXOR X0, X11
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PCLMULQDQ $0x00, X11, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
LEAQ 16(SI), SI
LEAQ 16(DX), DX
JMP gcmAesEncSinglesLoop
gcmAesEncTail:
TESTQ R9, R9
JE gcmAesEncDone
MOVOU 128(SP), X0
AESENC X1, X0
AESENC X2, X0
AESENC X3, X0
AESENC X4, X0
AESENC X5, X0
AESENC X6, X0
AESENC X7, X0
MOVOU 128(AX), X11
AESENC X11, X0
MOVOU 144(AX), X11
AESENC X11, X0
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB encLast4
AESENC X11, X0
MOVOU 176(AX), X11
AESENC X11, X0
MOVOU 192(AX), X11
JE encLast4
AESENC X11, X0
MOVOU 208(AX), X11
AESENC X11, X0
MOVOU 224(AX), X11
encLast4:
AESENCLAST X11, X0
MOVOU X0, X11
LEAQ -1(SI)(R9*1), SI
MOVQ R9, R11
SHLQ $0x04, R11
LEAQ andMask<>+0(SB), R10
MOVOU -16(R10)(R11*1), X12
PXOR X0, X0
ptxLoadLoop:
PSLLDQ $0x01, X0
PINSRB $0x00, (SI), X0
LEAQ -1(SI), SI
DECQ R9
JNE ptxLoadLoop
PXOR X11, X0
PAND X12, X0
MOVOU X0, (DX)
PSHUFB X15, X0
PXOR X8, X0
MOVOU X13, X8
MOVOU X13, X9
MOVOU 240(DI), X10
PSHUFD $0x4e, X0, X11
PXOR X0, X11
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PCLMULQDQ $0x00, X11, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
gcmAesEncDone:
MOVOU X8, (R8)
RET
DATA andMask<>+0(SB)/8, $0x00000000000000ff
DATA andMask<>+8(SB)/8, $0x0000000000000000
DATA andMask<>+16(SB)/8, $0x000000000000ffff
DATA andMask<>+24(SB)/8, $0x0000000000000000
DATA andMask<>+32(SB)/8, $0x0000000000ffffff
DATA andMask<>+40(SB)/8, $0x0000000000000000
DATA andMask<>+48(SB)/8, $0x00000000ffffffff
DATA andMask<>+56(SB)/8, $0x0000000000000000
DATA andMask<>+64(SB)/8, $0x000000ffffffffff
DATA andMask<>+72(SB)/8, $0x0000000000000000
DATA andMask<>+80(SB)/8, $0x0000ffffffffffff
DATA andMask<>+88(SB)/8, $0x0000000000000000
DATA andMask<>+96(SB)/8, $0x00ffffffffffffff
DATA andMask<>+104(SB)/8, $0x0000000000000000
DATA andMask<>+112(SB)/8, $0xffffffffffffffff
DATA andMask<>+120(SB)/8, $0x0000000000000000
DATA andMask<>+128(SB)/8, $0xffffffffffffffff
DATA andMask<>+136(SB)/8, $0x00000000000000ff
DATA andMask<>+144(SB)/8, $0xffffffffffffffff
DATA andMask<>+152(SB)/8, $0x000000000000ffff
DATA andMask<>+160(SB)/8, $0xffffffffffffffff
DATA andMask<>+168(SB)/8, $0x0000000000ffffff
DATA andMask<>+176(SB)/8, $0xffffffffffffffff
DATA andMask<>+184(SB)/8, $0x00000000ffffffff
DATA andMask<>+192(SB)/8, $0xffffffffffffffff
DATA andMask<>+200(SB)/8, $0x000000ffffffffff
DATA andMask<>+208(SB)/8, $0xffffffffffffffff
DATA andMask<>+216(SB)/8, $0x0000ffffffffffff
DATA andMask<>+224(SB)/8, $0xffffffffffffffff
DATA andMask<>+232(SB)/8, $0x00ffffffffffffff
GLOBL andMask<>(SB), RODATA|NOPTR, $240
// func gcmAesDec(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
// Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
TEXT ·gcmAesDec(SB), $128-96
MOVQ productTable+0(FP), DI
MOVQ dst_base+8(FP), SI
MOVQ src_base+32(FP), DX
MOVQ src_len+40(FP), R9
MOVQ ctr+56(FP), CX
MOVQ T+64(FP), R8
MOVQ ks_base+72(FP), AX
MOVQ ks_len+80(FP), R13
SHRQ $0x02, R13
DECQ R13
MOVOU bswapMask<>+0(SB), X15
MOVOU gcmPoly<>+0(SB), X14
MOVOU (R8), X8
PXOR X9, X9
PXOR X10, X10
MOVOU (CX), X0
MOVL 12(CX), R10
MOVOU (AX), X11
MOVL 12(AX), R12
BSWAPL R10
BSWAPL R12
PXOR X0, X11
MOVOU X11, (SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 12(SP)
CMPQ R9, $0x80
JB gcmAesDecSingles
MOVOU X11, 16(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 28(SP)
MOVOU X11, 32(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 44(SP)
MOVOU X11, 48(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 60(SP)
MOVOU X11, 64(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 76(SP)
MOVOU X11, 80(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 92(SP)
MOVOU X11, 96(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 108(SP)
MOVOU X11, 112(SP)
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 124(SP)
gcmAesDecOctetsLoop:
CMPQ R9, $0x80
JB gcmAesDecEndOctets
SUBQ $0x80, R9
MOVOU (SP), X0
MOVOU 16(SP), X1
MOVOU 32(SP), X2
MOVOU 48(SP), X3
MOVOU 64(SP), X4
MOVOU 80(SP), X5
MOVOU 96(SP), X6
MOVOU 112(SP), X7
MOVOU (DX), X11
PSHUFB X15, X11
PXOR X8, X11
PSHUFD $0x4e, X11, X12
PXOR X11, X12
MOVOU (DI), X8
MOVOU 16(DI), X10
MOVOU X8, X9
PCLMULQDQ $0x00, X12, X10
PCLMULQDQ $0x00, X11, X8
PCLMULQDQ $0x11, X11, X9
MOVOU 16(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 32(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 16(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 48(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 12(SP)
MOVOU 32(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 64(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 32(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 80(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 28(SP)
MOVOU 48(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 96(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 48(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 112(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 44(SP)
MOVOU 64(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 128(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 64(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 144(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 60(SP)
MOVOU 80(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 160(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 80(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 176(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 76(SP)
MOVOU 96(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 192(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 96(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 208(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 92(SP)
MOVOU 112(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
MOVOU 224(DI), X12
MOVOU X12, X13
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 112(DX), X11
PSHUFB X15, X11
PCLMULQDQ $0x00, X11, X12
PXOR X12, X8
PSHUFD $0x4e, X11, X12
PCLMULQDQ $0x11, X11, X13
PXOR X12, X11
PXOR X13, X9
MOVOU 240(DI), X13
PCLMULQDQ $0x00, X13, X11
PXOR X11, X10
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 108(SP)
MOVOU 128(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 124(SP)
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU 144(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB decLast1
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 176(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 192(AX), X11
JE decLast1
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 208(AX), X11
AESENC X11, X0
AESENC X11, X1
AESENC X11, X2
AESENC X11, X3
AESENC X11, X4
AESENC X11, X5
AESENC X11, X6
AESENC X11, X7
MOVOU 224(AX), X11
decLast1:
AESENCLAST X11, X0
AESENCLAST X11, X1
AESENCLAST X11, X2
AESENCLAST X11, X3
AESENCLAST X11, X4
AESENCLAST X11, X5
AESENCLAST X11, X6
AESENCLAST X11, X7
MOVOU (DX), X11
PXOR X11, X0
MOVOU 16(DX), X11
PXOR X11, X1
MOVOU 32(DX), X11
PXOR X11, X2
MOVOU 48(DX), X11
PXOR X11, X3
MOVOU 64(DX), X11
PXOR X11, X4
MOVOU 80(DX), X11
PXOR X11, X5
MOVOU 96(DX), X11
PXOR X11, X6
MOVOU 112(DX), X11
PXOR X11, X7
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, 32(SI)
MOVOU X3, 48(SI)
MOVOU X4, 64(SI)
MOVOU X5, 80(SI)
MOVOU X6, 96(SI)
MOVOU X7, 112(SI)
LEAQ 128(SI), SI
LEAQ 128(DX), DX
JMP gcmAesDecOctetsLoop
gcmAesDecEndOctets:
SUBQ $0x07, R10
gcmAesDecSingles:
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 224(DI), X13
gcmAesDecSinglesLoop:
CMPQ R9, $0x10
JB gcmAesDecTail
SUBQ $0x10, R9
MOVOU (DX), X0
MOVOU X0, X12
PSHUFB X15, X0
PXOR X8, X0
MOVOU X13, X8
MOVOU X13, X9
MOVOU 240(DI), X10
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PSHUFD $0x4e, X0, X11
PXOR X0, X11
PCLMULQDQ $0x00, X11, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
MOVOU (SP), X0
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 12(SP)
AESENC X1, X0
AESENC X2, X0
AESENC X3, X0
AESENC X4, X0
AESENC X5, X0
AESENC X6, X0
AESENC X7, X0
MOVOU 128(AX), X11
AESENC X11, X0
MOVOU 144(AX), X11
AESENC X11, X0
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB decLast2
AESENC X11, X0
MOVOU 176(AX), X11
AESENC X11, X0
MOVOU 192(AX), X11
JE decLast2
AESENC X11, X0
MOVOU 208(AX), X11
AESENC X11, X0
MOVOU 224(AX), X11
decLast2:
AESENCLAST X11, X0
PXOR X12, X0
MOVOU X0, (SI)
LEAQ 16(SI), SI
LEAQ 16(DX), DX
JMP gcmAesDecSinglesLoop
gcmAesDecTail:
TESTQ R9, R9
JE gcmAesDecDone
MOVQ R9, R11
SHLQ $0x04, R11
LEAQ andMask<>+0(SB), R10
MOVOU -16(R10)(R11*1), X12
MOVOU (DX), X0
PAND X12, X0
MOVOU X0, X12
PSHUFB X15, X0
PXOR X8, X0
MOVOU 224(DI), X8
MOVOU 240(DI), X10
MOVOU X8, X9
PCLMULQDQ $0x00, X0, X8
PCLMULQDQ $0x11, X0, X9
PSHUFD $0x4e, X0, X11
PXOR X0, X11
PCLMULQDQ $0x00, X11, X10
PXOR X8, X10
PXOR X9, X10
MOVOU X10, X11
PSRLDQ $0x08, X10
PSLLDQ $0x08, X11
PXOR X10, X9
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
MOVOU X14, X11
PCLMULQDQ $0x01, X8, X11
PSHUFD $0x4e, X8, X8
PXOR X11, X8
PXOR X9, X8
MOVOU (SP), X0
ADDL $0x01, R10
MOVL R10, R11
XORL R12, R11
BSWAPL R11
MOVL R11, 12(SP)
AESENC X1, X0
AESENC X2, X0
AESENC X3, X0
AESENC X4, X0
AESENC X5, X0
AESENC X6, X0
AESENC X7, X0
MOVOU 128(AX), X11
AESENC X11, X0
MOVOU 144(AX), X11
AESENC X11, X0
MOVOU 160(AX), X11
CMPQ R13, $0x0c
JB decLast3
AESENC X11, X0
MOVOU 176(AX), X11
AESENC X11, X0
MOVOU 192(AX), X11
JE decLast3
AESENC X11, X0
MOVOU 208(AX), X11
AESENC X11, X0
MOVOU 224(AX), X11
decLast3:
AESENCLAST X11, X0
PXOR X12, X0
ptxStoreLoop:
PEXTRB $0x00, X0, (SI)
PSRLDQ $0x01, X0
LEAQ 1(SI), SI
DECQ R9
JNE ptxStoreLoop
gcmAesDecDone:
MOVOU X8, (R8)
RET
|
The pages are generated with Golds v0.7.3-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |