// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT.

//go:build gc && !purego

// func update(state *macState, msg []byte)
TEXT ·update(SB), $0-32
	MOVQ state+0(FP), DI
	MOVQ msg_base+8(FP), SI
	MOVQ msg_len+16(FP), R15
	MOVQ (DI), R8
	MOVQ 8(DI), R9
	MOVQ 16(DI), R10
	MOVQ 24(DI), R11
	MOVQ 32(DI), R12
	CMPQ R15, $0x10
	JB   bytes_between_0_and_15

loop:
	ADDQ (SI), R8
	ADCQ 8(SI), R9
	ADCQ $0x01, R10
	LEAQ 16(SI), SI

multiply:
	MOVQ  R11, AX
	MULQ  R8
	MOVQ  AX, BX
	MOVQ  DX, CX
	MOVQ  R11, AX
	MULQ  R9
	ADDQ  AX, CX
	ADCQ  $0x00, DX
	MOVQ  R11, R13
	IMULQ R10, R13
	ADDQ  DX, R13
	MOVQ  R12, AX
	MULQ  R8
	ADDQ  AX, CX
	ADCQ  $0x00, DX
	MOVQ  DX, R8
	MOVQ  R12, R14
	IMULQ R10, R14
	MOVQ  R12, AX
	MULQ  R9
	ADDQ  AX, R13
	ADCQ  DX, R14
	ADDQ  R8, R13
	ADCQ  $0x00, R14
	MOVQ  BX, R8
	MOVQ  CX, R9
	MOVQ  R13, R10
	ANDQ  $0x03, R10
	MOVQ  R13, BX
	ANDQ  $-4, BX
	ADDQ  BX, R8
	ADCQ  R14, R9
	ADCQ  $0x00, R10
	SHRQ  $0x02, R14, R13
	SHRQ  $0x02, R14
	ADDQ  R13, R8
	ADCQ  R14, R9
	ADCQ  $0x00, R10
	SUBQ  $0x10, R15
	CMPQ  R15, $0x10
	JAE   loop

bytes_between_0_and_15:
	TESTQ R15, R15
	JZ    done
	MOVQ  $0x00000001, BX
	XORQ  CX, CX
	XORQ  R13, R13
	ADDQ  R15, SI

flush_buffer:
	SHLQ $0x08, BX, CX
	SHLQ $0x08, BX
	MOVB -1(SI), R13
	XORQ R13, BX
	DECQ SI
	DECQ R15
	JNZ  flush_buffer
	ADDQ BX, R8
	ADCQ CX, R9
	ADCQ $0x00, R10
	MOVQ $0x00000010, R15
	JMP  multiply

done:
	MOVQ R8, (DI)
	MOVQ R9, 8(DI)
	MOVQ R10, 16(DI)
	RET