// Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT.

//go:build !purego

#include "textflag.h"

// func blockAMD64(dig *digest, p []byte)
TEXT ·blockAMD64(SB), NOSPLIT, $64-32
	MOVQ dig+0(FP), BP
	MOVQ p_base+8(FP), SI
	MOVQ p_len+16(FP), DX
	SHRQ $0x06, DX
	SHLQ $0x06, DX
	LEAQ (SI)(DX*1), DI
	MOVL (BP), AX
	MOVL 4(BP), BX
	MOVL 8(BP), CX
	MOVL 12(BP), DX
	MOVL 16(BP), BP
	CMPQ SI, DI
	JEQ  end

loop:
	MOVL   AX, R11
	MOVL   BX, R12
	MOVL   CX, R13
	MOVL   DX, R14
	MOVL   BP, R15
	MOVL   (SI), R10
	BSWAPL R10
	MOVL   R10, (SP)
	MOVL   DX, R9
	XORL   CX, R9
	ANDL   BX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   4(SI), R10
	BSWAPL R10
	MOVL   R10, 4(SP)
	MOVL   CX, R9
	XORL   BX, R9
	ANDL   AX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1518500249(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   8(SI), R10
	BSWAPL R10
	MOVL   R10, 8(SP)
	MOVL   BX, R9
	XORL   AX, R9
	ANDL   BP, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   12(SI), R10
	BSWAPL R10
	MOVL   R10, 12(SP)
	MOVL   AX, R9
	XORL   BP, R9
	ANDL   DX, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   16(SI), R10
	BSWAPL R10
	MOVL   R10, 16(SP)
	MOVL   BP, R9
	XORL   DX, R9
	ANDL   CX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   20(SI), R10
	BSWAPL R10
	MOVL   R10, 20(SP)
	MOVL   DX, R9
	XORL   CX, R9
	ANDL   BX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   24(SI), R10
	BSWAPL R10
	MOVL   R10, 24(SP)
	MOVL   CX, R9
	XORL   BX, R9
	ANDL   AX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1518500249(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   28(SI), R10
	BSWAPL R10
	MOVL   R10, 28(SP)
	MOVL   BX, R9
	XORL   AX, R9
	ANDL   BP, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   32(SI), R10
	BSWAPL R10
	MOVL   R10, 32(SP)
	MOVL   AX, R9
	XORL   BP, R9
	ANDL   DX, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   36(SI), R10
	BSWAPL R10
	MOVL   R10, 36(SP)
	MOVL   BP, R9
	XORL   DX, R9
	ANDL   CX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   40(SI), R10
	BSWAPL R10
	MOVL   R10, 40(SP)
	MOVL   DX, R9
	XORL   CX, R9
	ANDL   BX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   44(SI), R10
	BSWAPL R10
	MOVL   R10, 44(SP)
	MOVL   CX, R9
	XORL   BX, R9
	ANDL   AX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1518500249(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   48(SI), R10
	BSWAPL R10
	MOVL   R10, 48(SP)
	MOVL   BX, R9
	XORL   AX, R9
	ANDL   BP, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   52(SI), R10
	BSWAPL R10
	MOVL   R10, 52(SP)
	MOVL   AX, R9
	XORL   BP, R9
	ANDL   DX, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   56(SI), R10
	BSWAPL R10
	MOVL   R10, 56(SP)
	MOVL   BP, R9
	XORL   DX, R9
	ANDL   CX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   60(SI), R10
	BSWAPL R10
	MOVL   R10, 60(SP)
	MOVL   DX, R9
	XORL   CX, R9
	ANDL   BX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   (SP), R10
	XORL   52(SP), R10
	XORL   32(SP), R10
	XORL   8(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, (SP)
	MOVL   CX, R9
	XORL   BX, R9
	ANDL   AX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1518500249(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   4(SP), R10
	XORL   56(SP), R10
	XORL   36(SP), R10
	XORL   12(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 4(SP)
	MOVL   BX, R9
	XORL   AX, R9
	ANDL   BP, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   8(SP), R10
	XORL   60(SP), R10
	XORL   40(SP), R10
	XORL   16(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 8(SP)
	MOVL   AX, R9
	XORL   BP, R9
	ANDL   DX, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   12(SP), R10
	XORL   (SP), R10
	XORL   44(SP), R10
	XORL   20(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 12(SP)
	MOVL   BP, R9
	XORL   DX, R9
	ANDL   CX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1518500249(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   16(SP), R10
	XORL   4(SP), R10
	XORL   48(SP), R10
	XORL   24(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 16(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   20(SP), R10
	XORL   8(SP), R10
	XORL   52(SP), R10
	XORL   28(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 20(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1859775393(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   24(SP), R10
	XORL   12(SP), R10
	XORL   56(SP), R10
	XORL   32(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 24(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   28(SP), R10
	XORL   16(SP), R10
	XORL   60(SP), R10
	XORL   36(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 28(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   32(SP), R10
	XORL   20(SP), R10
	XORL   (SP), R10
	XORL   40(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 32(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   36(SP), R10
	XORL   24(SP), R10
	XORL   4(SP), R10
	XORL   44(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 36(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   40(SP), R10
	XORL   28(SP), R10
	XORL   8(SP), R10
	XORL   48(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 40(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1859775393(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   44(SP), R10
	XORL   32(SP), R10
	XORL   12(SP), R10
	XORL   52(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 44(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   48(SP), R10
	XORL   36(SP), R10
	XORL   16(SP), R10
	XORL   56(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 48(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   52(SP), R10
	XORL   40(SP), R10
	XORL   20(SP), R10
	XORL   60(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 52(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   56(SP), R10
	XORL   44(SP), R10
	XORL   24(SP), R10
	XORL   (SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 56(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   60(SP), R10
	XORL   48(SP), R10
	XORL   28(SP), R10
	XORL   4(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 60(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1859775393(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   (SP), R10
	XORL   52(SP), R10
	XORL   32(SP), R10
	XORL   8(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, (SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   4(SP), R10
	XORL   56(SP), R10
	XORL   36(SP), R10
	XORL   12(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 4(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   8(SP), R10
	XORL   60(SP), R10
	XORL   40(SP), R10
	XORL   16(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 8(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   12(SP), R10
	XORL   (SP), R10
	XORL   44(SP), R10
	XORL   20(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 12(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   16(SP), R10
	XORL   4(SP), R10
	XORL   48(SP), R10
	XORL   24(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 16(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   1859775393(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   20(SP), R10
	XORL   8(SP), R10
	XORL   52(SP), R10
	XORL   28(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 20(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   24(SP), R10
	XORL   12(SP), R10
	XORL   56(SP), R10
	XORL   32(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 24(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   28(SP), R10
	XORL   16(SP), R10
	XORL   60(SP), R10
	XORL   36(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 28(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   1859775393(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   32(SP), R10
	XORL   20(SP), R10
	XORL   (SP), R10
	XORL   40(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 32(SP)
	MOVL   BX, R8
	ORL    CX, R8
	ANDL   DX, R8
	MOVL   BX, R9
	ANDL   CX, R9
	ORL    R8, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   36(SP), R10
	XORL   24(SP), R10
	XORL   4(SP), R10
	XORL   44(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 36(SP)
	MOVL   AX, R8
	ORL    BX, R8
	ANDL   CX, R8
	MOVL   AX, R9
	ANDL   BX, R9
	ORL    R8, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   2400959708(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   40(SP), R10
	XORL   28(SP), R10
	XORL   8(SP), R10
	XORL   48(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 40(SP)
	MOVL   BP, R8
	ORL    AX, R8
	ANDL   BX, R8
	MOVL   BP, R9
	ANDL   AX, R9
	ORL    R8, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   44(SP), R10
	XORL   32(SP), R10
	XORL   12(SP), R10
	XORL   52(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 44(SP)
	MOVL   DX, R8
	ORL    BP, R8
	ANDL   AX, R8
	MOVL   DX, R9
	ANDL   BP, R9
	ORL    R8, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   48(SP), R10
	XORL   36(SP), R10
	XORL   16(SP), R10
	XORL   56(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 48(SP)
	MOVL   CX, R8
	ORL    DX, R8
	ANDL   BP, R8
	MOVL   CX, R9
	ANDL   DX, R9
	ORL    R8, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   52(SP), R10
	XORL   40(SP), R10
	XORL   20(SP), R10
	XORL   60(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 52(SP)
	MOVL   BX, R8
	ORL    CX, R8
	ANDL   DX, R8
	MOVL   BX, R9
	ANDL   CX, R9
	ORL    R8, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   56(SP), R10
	XORL   44(SP), R10
	XORL   24(SP), R10
	XORL   (SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 56(SP)
	MOVL   AX, R8
	ORL    BX, R8
	ANDL   CX, R8
	MOVL   AX, R9
	ANDL   BX, R9
	ORL    R8, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   2400959708(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   60(SP), R10
	XORL   48(SP), R10
	XORL   28(SP), R10
	XORL   4(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 60(SP)
	MOVL   BP, R8
	ORL    AX, R8
	ANDL   BX, R8
	MOVL   BP, R9
	ANDL   AX, R9
	ORL    R8, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   (SP), R10
	XORL   52(SP), R10
	XORL   32(SP), R10
	XORL   8(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, (SP)
	MOVL   DX, R8
	ORL    BP, R8
	ANDL   AX, R8
	MOVL   DX, R9
	ANDL   BP, R9
	ORL    R8, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   4(SP), R10
	XORL   56(SP), R10
	XORL   36(SP), R10
	XORL   12(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 4(SP)
	MOVL   CX, R8
	ORL    DX, R8
	ANDL   BP, R8
	MOVL   CX, R9
	ANDL   DX, R9
	ORL    R8, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   8(SP), R10
	XORL   60(SP), R10
	XORL   40(SP), R10
	XORL   16(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 8(SP)
	MOVL   BX, R8
	ORL    CX, R8
	ANDL   DX, R8
	MOVL   BX, R9
	ANDL   CX, R9
	ORL    R8, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   12(SP), R10
	XORL   (SP), R10
	XORL   44(SP), R10
	XORL   20(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 12(SP)
	MOVL   AX, R8
	ORL    BX, R8
	ANDL   CX, R8
	MOVL   AX, R9
	ANDL   BX, R9
	ORL    R8, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   2400959708(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   16(SP), R10
	XORL   4(SP), R10
	XORL   48(SP), R10
	XORL   24(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 16(SP)
	MOVL   BP, R8
	ORL    AX, R8
	ANDL   BX, R8
	MOVL   BP, R9
	ANDL   AX, R9
	ORL    R8, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   20(SP), R10
	XORL   8(SP), R10
	XORL   52(SP), R10
	XORL   28(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 20(SP)
	MOVL   DX, R8
	ORL    BP, R8
	ANDL   AX, R8
	MOVL   DX, R9
	ANDL   BP, R9
	ORL    R8, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   24(SP), R10
	XORL   12(SP), R10
	XORL   56(SP), R10
	XORL   32(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 24(SP)
	MOVL   CX, R8
	ORL    DX, R8
	ANDL   BP, R8
	MOVL   CX, R9
	ANDL   DX, R9
	ORL    R8, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   28(SP), R10
	XORL   16(SP), R10
	XORL   60(SP), R10
	XORL   36(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 28(SP)
	MOVL   BX, R8
	ORL    CX, R8
	ANDL   DX, R8
	MOVL   BX, R9
	ANDL   CX, R9
	ORL    R8, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   32(SP), R10
	XORL   20(SP), R10
	XORL   (SP), R10
	XORL   40(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 32(SP)
	MOVL   AX, R8
	ORL    BX, R8
	ANDL   CX, R8
	MOVL   AX, R9
	ANDL   BX, R9
	ORL    R8, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   2400959708(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   36(SP), R10
	XORL   24(SP), R10
	XORL   4(SP), R10
	XORL   44(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 36(SP)
	MOVL   BP, R8
	ORL    AX, R8
	ANDL   BX, R8
	MOVL   BP, R9
	ANDL   AX, R9
	ORL    R8, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   40(SP), R10
	XORL   28(SP), R10
	XORL   8(SP), R10
	XORL   48(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 40(SP)
	MOVL   DX, R8
	ORL    BP, R8
	ANDL   AX, R8
	MOVL   DX, R9
	ANDL   BP, R9
	ORL    R8, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   44(SP), R10
	XORL   32(SP), R10
	XORL   12(SP), R10
	XORL   52(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 44(SP)
	MOVL   CX, R8
	ORL    DX, R8
	ANDL   BP, R8
	MOVL   CX, R9
	ANDL   DX, R9
	ORL    R8, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   2400959708(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   48(SP), R10
	XORL   36(SP), R10
	XORL   16(SP), R10
	XORL   56(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 48(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   52(SP), R10
	XORL   40(SP), R10
	XORL   20(SP), R10
	XORL   60(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 52(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   3395469782(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   56(SP), R10
	XORL   44(SP), R10
	XORL   24(SP), R10
	XORL   (SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 56(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   60(SP), R10
	XORL   48(SP), R10
	XORL   28(SP), R10
	XORL   4(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 60(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   (SP), R10
	XORL   52(SP), R10
	XORL   32(SP), R10
	XORL   8(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, (SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   4(SP), R10
	XORL   56(SP), R10
	XORL   36(SP), R10
	XORL   12(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 4(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   8(SP), R10
	XORL   60(SP), R10
	XORL   40(SP), R10
	XORL   16(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 8(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   3395469782(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   12(SP), R10
	XORL   (SP), R10
	XORL   44(SP), R10
	XORL   20(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 12(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   16(SP), R10
	XORL   4(SP), R10
	XORL   48(SP), R10
	XORL   24(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 16(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   20(SP), R10
	XORL   8(SP), R10
	XORL   52(SP), R10
	XORL   28(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 20(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   24(SP), R10
	XORL   12(SP), R10
	XORL   56(SP), R10
	XORL   32(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 24(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   28(SP), R10
	XORL   16(SP), R10
	XORL   60(SP), R10
	XORL   36(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 28(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   3395469782(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   32(SP), R10
	XORL   20(SP), R10
	XORL   (SP), R10
	XORL   40(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 32(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   36(SP), R10
	XORL   24(SP), R10
	XORL   4(SP), R10
	XORL   44(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 36(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   40(SP), R10
	XORL   28(SP), R10
	XORL   8(SP), R10
	XORL   48(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 40(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(AX)(R10*1), AX
	ADDL   R8, AX
	MOVL   44(SP), R10
	XORL   32(SP), R10
	XORL   12(SP), R10
	XORL   52(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 44(SP)
	MOVL   BX, R9
	XORL   CX, R9
	XORL   DX, R9
	ROLL   $0x1e, BX
	ADDL   R9, BP
	MOVL   AX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BP)(R10*1), BP
	ADDL   R8, BP
	MOVL   48(SP), R10
	XORL   36(SP), R10
	XORL   16(SP), R10
	XORL   56(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 48(SP)
	MOVL   AX, R9
	XORL   BX, R9
	XORL   CX, R9
	ROLL   $0x1e, AX
	ADDL   R9, DX
	MOVL   BP, R8
	ROLL   $0x05, R8
	LEAL   3395469782(DX)(R10*1), DX
	ADDL   R8, DX
	MOVL   52(SP), R10
	XORL   40(SP), R10
	XORL   20(SP), R10
	XORL   60(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 52(SP)
	MOVL   BP, R9
	XORL   AX, R9
	XORL   BX, R9
	ROLL   $0x1e, BP
	ADDL   R9, CX
	MOVL   DX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(CX)(R10*1), CX
	ADDL   R8, CX
	MOVL   56(SP), R10
	XORL   44(SP), R10
	XORL   24(SP), R10
	XORL   (SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 56(SP)
	MOVL   DX, R9
	XORL   BP, R9
	XORL   AX, R9
	ROLL   $0x1e, DX
	ADDL   R9, BX
	MOVL   CX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(BX)(R10*1), BX
	ADDL   R8, BX
	MOVL   60(SP), R10
	XORL   48(SP), R10
	XORL   28(SP), R10
	XORL   4(SP), R10
	ROLL   $0x01, R10
	MOVL   R10, 60(SP)
	MOVL   CX, R9
	XORL   DX, R9
	XORL   BP, R9
	ROLL   $0x1e, CX
	ADDL   R9, AX
	MOVL   BX, R8
	ROLL   $0x05, R8
	LEAL   3395469782(AX)(R10*1), AX
	ADDL   R8, AX
	ADDL   R11, AX
	ADDL   R12, BX
	ADDL   R13, CX
	ADDL   R14, DX
	ADDL   R15, BP
	ADDQ   $0x40, SI
	CMPQ   SI, DI
	JB     loop

end:
	MOVQ dig+0(FP), DI
	MOVL AX, (DI)
	MOVL BX, 4(DI)
	MOVL CX, 8(DI)
	MOVL DX, 12(DI)
	MOVL BP, 16(DI)
	RET

// func blockAVX2(dig *digest, p []byte)
// Requires: AVX, AVX2, BMI, BMI2, CMOV
TEXT ·blockAVX2(SB), $1408-32
	MOVQ        dig+0(FP), DI
	MOVQ        p_base+8(FP), SI
	MOVQ        p_len+16(FP), DX
	SHRQ        $0x06, DX
	SHLQ        $0x06, DX
	LEAQ        K_XMM_AR<>+0(SB), R8
	MOVQ        DI, R9
	MOVQ        SI, R10
	LEAQ        64(SI), R13
	ADDQ        SI, DX
	ADDQ        $0x40, DX
	MOVQ        DX, R11
	CMPQ        R13, R11
	CMOVQCC     R8, R13
	VMOVDQU     BSWAP_SHUFB_CTL<>+0(SB), Y10
	MOVL        (R9), CX
	MOVL        4(R9), SI
	MOVL        8(R9), DI
	MOVL        12(R9), AX
	MOVL        16(R9), DX
	MOVQ        SP, R14
	LEAQ        672(SP), R15
	VMOVDQU     (R10), X0
	VINSERTI128 $0x01, (R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y15
	VPADDD      (R8), Y15, Y0
	VMOVDQU     Y0, (R14)
	VMOVDQU     16(R10), X0
	VINSERTI128 $0x01, 16(R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y14
	VPADDD      (R8), Y14, Y0
	VMOVDQU     Y0, 32(R14)
	VMOVDQU     32(R10), X0
	VINSERTI128 $0x01, 32(R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y13
	VPADDD      (R8), Y13, Y0
	VMOVDQU     Y0, 64(R14)
	VMOVDQU     48(R10), X0
	VINSERTI128 $0x01, 48(R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y12
	VPADDD      (R8), Y12, Y0
	VMOVDQU     Y0, 96(R14)
	VPALIGNR    $0x08, Y15, Y14, Y8
	VPSRLDQ     $0x04, Y12, Y0
	VPXOR       Y13, Y8, Y8
	VPXOR       Y15, Y0, Y0
	VPXOR       Y0, Y8, Y8
	VPSLLDQ     $0x0c, Y8, Y9
	VPSLLD      $0x01, Y8, Y0
	VPSRLD      $0x1f, Y8, Y8
	VPOR        Y8, Y0, Y0
	VPSLLD      $0x02, Y9, Y8
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y8, Y0, Y0
	VPXOR       Y9, Y0, Y8
	VPADDD      (R8), Y8, Y0
	VMOVDQU     Y0, 128(R14)
	VPALIGNR    $0x08, Y14, Y13, Y7
	VPSRLDQ     $0x04, Y8, Y0
	VPXOR       Y12, Y7, Y7
	VPXOR       Y14, Y0, Y0
	VPXOR       Y0, Y7, Y7
	VPSLLDQ     $0x0c, Y7, Y9
	VPSLLD      $0x01, Y7, Y0
	VPSRLD      $0x1f, Y7, Y7
	VPOR        Y7, Y0, Y0
	VPSLLD      $0x02, Y9, Y7
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y7, Y0, Y0
	VPXOR       Y9, Y0, Y7
	VPADDD      32(R8), Y7, Y0
	VMOVDQU     Y0, 160(R14)
	VPALIGNR    $0x08, Y13, Y12, Y5
	VPSRLDQ     $0x04, Y7, Y0
	VPXOR       Y8, Y5, Y5
	VPXOR       Y13, Y0, Y0
	VPXOR       Y0, Y5, Y5
	VPSLLDQ     $0x0c, Y5, Y9
	VPSLLD      $0x01, Y5, Y0
	VPSRLD      $0x1f, Y5, Y5
	VPOR        Y5, Y0, Y0
	VPSLLD      $0x02, Y9, Y5
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y5, Y0, Y0
	VPXOR       Y9, Y0, Y5
	VPADDD      32(R8), Y5, Y0
	VMOVDQU     Y0, 192(R14)
	VPALIGNR    $0x08, Y12, Y8, Y3
	VPSRLDQ     $0x04, Y5, Y0
	VPXOR       Y7, Y3, Y3
	VPXOR       Y12, Y0, Y0
	VPXOR       Y0, Y3, Y3
	VPSLLDQ     $0x0c, Y3, Y9
	VPSLLD      $0x01, Y3, Y0
	VPSRLD      $0x1f, Y3, Y3
	VPOR        Y3, Y0, Y0
	VPSLLD      $0x02, Y9, Y3
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y3, Y0, Y0
	VPXOR       Y9, Y0, Y3
	VPADDD      32(R8), Y3, Y0
	VMOVDQU     Y0, 224(R14)
	VPALIGNR    $0x08, Y5, Y3, Y0
	VPXOR       Y14, Y15, Y15
	VPXOR       Y8, Y0, Y0
	VPXOR       Y0, Y15, Y15
	VPSLLD      $0x02, Y15, Y0
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	VPADDD      32(R8), Y15, Y0
	VMOVDQU     Y0, 256(R14)
	VPALIGNR    $0x08, Y3, Y15, Y0
	VPXOR       Y13, Y14, Y14
	VPXOR       Y7, Y0, Y0
	VPXOR       Y0, Y14, Y14
	VPSLLD      $0x02, Y14, Y0
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	VPADDD      32(R8), Y14, Y0
	VMOVDQU     Y0, 288(R14)
	VPALIGNR    $0x08, Y15, Y14, Y0
	VPXOR       Y12, Y13, Y13
	VPXOR       Y5, Y0, Y0
	VPXOR       Y0, Y13, Y13
	VPSLLD      $0x02, Y13, Y0
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	VPADDD      64(R8), Y13, Y0
	VMOVDQU     Y0, 320(R14)
	VPALIGNR    $0x08, Y14, Y13, Y0
	VPXOR       Y8, Y12, Y12
	VPXOR       Y3, Y0, Y0
	VPXOR       Y0, Y12, Y12
	VPSLLD      $0x02, Y12, Y0
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	VPADDD      64(R8), Y12, Y0
	VMOVDQU     Y0, 352(R14)
	VPALIGNR    $0x08, Y13, Y12, Y0
	VPXOR       Y7, Y8, Y8
	VPXOR       Y15, Y0, Y0
	VPXOR       Y0, Y8, Y8
	VPSLLD      $0x02, Y8, Y0
	VPSRLD      $0x1e, Y8, Y8
	VPOR        Y8, Y0, Y8
	VPADDD      64(R8), Y8, Y0
	VMOVDQU     Y0, 384(R14)
	VPALIGNR    $0x08, Y12, Y8, Y0
	VPXOR       Y5, Y7, Y7
	VPXOR       Y14, Y0, Y0
	VPXOR       Y0, Y7, Y7
	VPSLLD      $0x02, Y7, Y0
	VPSRLD      $0x1e, Y7, Y7
	VPOR        Y7, Y0, Y7
	VPADDD      64(R8), Y7, Y0
	VMOVDQU     Y0, 416(R14)
	VPALIGNR    $0x08, Y8, Y7, Y0
	VPXOR       Y3, Y5, Y5
	VPXOR       Y13, Y0, Y0
	VPXOR       Y0, Y5, Y5
	VPSLLD      $0x02, Y5, Y0
	VPSRLD      $0x1e, Y5, Y5
	VPOR        Y5, Y0, Y5
	VPADDD      64(R8), Y5, Y0
	VMOVDQU     Y0, 448(R14)
	VPALIGNR    $0x08, Y7, Y5, Y0
	VPXOR       Y15, Y3, Y3
	VPXOR       Y12, Y0, Y0
	VPXOR       Y0, Y3, Y3
	VPSLLD      $0x02, Y3, Y0
	VPSRLD      $0x1e, Y3, Y3
	VPOR        Y3, Y0, Y3
	VPADDD      96(R8), Y3, Y0
	VMOVDQU     Y0, 480(R14)
	VPALIGNR    $0x08, Y5, Y3, Y0
	VPXOR       Y14, Y15, Y15
	VPXOR       Y8, Y0, Y0
	VPXOR       Y0, Y15, Y15
	VPSLLD      $0x02, Y15, Y0
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	VPADDD      96(R8), Y15, Y0
	VMOVDQU     Y0, 512(R14)
	VPALIGNR    $0x08, Y3, Y15, Y0
	VPXOR       Y13, Y14, Y14
	VPXOR       Y7, Y0, Y0
	VPXOR       Y0, Y14, Y14
	VPSLLD      $0x02, Y14, Y0
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	VPADDD      96(R8), Y14, Y0
	VMOVDQU     Y0, 544(R14)
	VPALIGNR    $0x08, Y15, Y14, Y0
	VPXOR       Y12, Y13, Y13
	VPXOR       Y5, Y0, Y0
	VPXOR       Y0, Y13, Y13
	VPSLLD      $0x02, Y13, Y0
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	VPADDD      96(R8), Y13, Y0
	VMOVDQU     Y0, 576(R14)
	VPALIGNR    $0x08, Y14, Y13, Y0
	VPXOR       Y8, Y12, Y12
	VPXOR       Y3, Y0, Y0
	VPXOR       Y0, Y12, Y12
	VPSLLD      $0x02, Y12, Y0
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	VPADDD      96(R8), Y12, Y0
	VMOVDQU     Y0, 608(R14)
	XCHGQ       R15, R14

loop:
	CMPQ R10, R8
	JNE  begin
	VZEROUPPER
	RET

begin:
	MOVL        SI, BX
	RORXL       $0x02, SI, SI
	ANDNL       AX, BX, BP
	ANDL        DI, BX
	XORL        BP, BX
	ADDL        (R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VMOVDQU     128(R10), X0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        4(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VINSERTI128 $0x01, 128(R13), Y0, Y0
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        8(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSHUFB     Y10, Y0, Y15
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        12(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        32(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPADDD      (R8), Y15, Y0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        36(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        40(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        44(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VMOVDQU     Y0, (R14)
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        64(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VMOVDQU     144(R10), X0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        68(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VINSERTI128 $0x01, 144(R13), Y0, Y0
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        72(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSHUFB     Y10, Y0, Y14
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        76(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        96(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPADDD      (R8), Y14, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        100(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        104(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        108(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VMOVDQU     Y0, 32(R14)
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        128(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VMOVDQU     160(R10), X0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        132(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VINSERTI128 $0x01, 160(R13), Y0, Y0
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        136(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSHUFB     Y10, Y0, Y13
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        140(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        160(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPADDD      (R8), Y13, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        164(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        168(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        172(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VMOVDQU     Y0, 64(R14)
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        192(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VMOVDQU     176(R10), X0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        196(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VINSERTI128 $0x01, 176(R13), Y0, Y0
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        200(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSHUFB     Y10, Y0, Y12
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        204(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        224(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPADDD      (R8), Y12, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        228(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        232(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        236(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VMOVDQU     Y0, 96(R14)
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        256(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y15, Y14, Y8
	VPSRLDQ     $0x04, Y12, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        260(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y13, Y8, Y8
	VPXOR       Y15, Y0, Y0
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        264(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y0, Y8, Y8
	VPSLLDQ     $0x0c, Y8, Y9
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        268(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSLLD      $0x01, Y8, Y0
	VPSRLD      $0x1f, Y8, Y8
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        288(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPOR        Y8, Y0, Y0
	VPSLLD      $0x02, Y9, Y8
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        292(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y8, Y0, Y0
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        296(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        300(R15), SI
	VPXOR       Y9, Y0, Y8
	VPADDD      (R8), Y8, Y0
	VMOVDQU     Y0, 128(R14)
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        320(R15), BX
	VPALIGNR    $0x08, Y14, Y13, Y7
	VPSRLDQ     $0x04, Y8, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        324(R15), CX
	VPXOR       Y12, Y7, Y7
	VPXOR       Y14, Y0, Y0
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        328(R15), DX
	VPXOR       Y0, Y7, Y7
	VPSLLDQ     $0x0c, Y7, Y9
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        332(R15), AX
	VPSLLD      $0x01, Y7, Y0
	VPSRLD      $0x1f, Y7, Y7
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        352(R15), DI
	VPOR        Y7, Y0, Y0
	VPSLLD      $0x02, Y9, Y7
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        356(R15), SI
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y7, Y0, Y0
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        360(R15), BX
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        364(R15), CX
	VPXOR       Y9, Y0, Y7
	VPADDD      32(R8), Y7, Y0
	VMOVDQU     Y0, 160(R14)
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        384(R15), DX
	VPALIGNR    $0x08, Y13, Y12, Y5
	VPSRLDQ     $0x04, Y7, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        388(R15), AX
	VPXOR       Y8, Y5, Y5
	VPXOR       Y13, Y0, Y0
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        392(R15), DI
	VPXOR       Y0, Y5, Y5
	VPSLLDQ     $0x0c, Y5, Y9
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        396(R15), SI
	VPSLLD      $0x01, Y5, Y0
	VPSRLD      $0x1f, Y5, Y5
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        416(R15), BX
	VPOR        Y5, Y0, Y0
	VPSLLD      $0x02, Y9, Y5
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        420(R15), CX
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y5, Y0, Y0
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        424(R15), DX
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        428(R15), AX
	VPXOR       Y9, Y0, Y5
	VPADDD      32(R8), Y5, Y0
	VMOVDQU     Y0, 192(R14)
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        448(R15), DI
	VPALIGNR    $0x08, Y12, Y8, Y3
	VPSRLDQ     $0x04, Y5, Y0
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        452(R15), SI
	VPXOR       Y7, Y3, Y3
	VPXOR       Y12, Y0, Y0
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        456(R15), BX
	VPXOR       Y0, Y3, Y3
	VPSLLDQ     $0x0c, Y3, Y9
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        460(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSLLD      $0x01, Y3, Y0
	VPSRLD      $0x1f, Y3, Y3
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDQ        $0x80, R10
	CMPQ        R10, R11
	CMOVQCC     R8, R10
	ADDL        480(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPOR        Y3, Y0, Y0
	VPSLLD      $0x02, Y9, Y3
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        484(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y3, Y0, Y0
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        488(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        492(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y9, Y0, Y3
	VPADDD      32(R8), Y3, Y0
	VMOVDQU     Y0, 224(R14)
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        512(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPALIGNR    $0x08, Y5, Y3, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        516(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y14, Y15, Y15
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        520(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPXOR       Y8, Y0, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        524(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y15, Y15
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        544(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y15, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        548(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        552(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        556(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPADDD      32(R8), Y15, Y0
	VMOVDQU     Y0, 256(R14)
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        576(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPALIGNR    $0x08, Y3, Y15, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        580(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y13, Y14, Y14
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        584(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPXOR       Y7, Y0, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        588(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y0, Y14, Y14
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        608(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSLLD      $0x02, Y14, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        612(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        616(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        620(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	VPADDD      32(R8), Y14, Y0
	VMOVDQU     Y0, 288(R14)
	ADDL        R12, AX
	ADDL        (R9), AX
	MOVL        AX, (R9)
	ADDL        4(R9), DX
	MOVL        DX, 4(R9)
	ADDL        8(R9), BX
	MOVL        BX, 8(R9)
	ADDL        12(R9), SI
	MOVL        SI, 12(R9)
	ADDL        16(R9), DI
	MOVL        DI, 16(R9)
	CMPQ        R10, R8
	JE          loop
	MOVL        DX, CX
	MOVL        CX, DX
	RORXL       $0x02, CX, CX
	ANDNL       SI, DX, BP
	ANDL        BX, DX
	XORL        BP, DX
	ADDL        16(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y15, Y14, Y0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        20(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y12, Y13, Y13
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        24(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y5, Y0, Y0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        28(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y0, Y13, Y13
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        48(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSLLD      $0x02, Y13, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        52(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        56(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        60(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPADDD      64(R8), Y13, Y0
	VMOVDQU     Y0, 320(R14)
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        80(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPALIGNR    $0x08, Y14, Y13, Y0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        84(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y8, Y12, Y12
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        88(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPXOR       Y3, Y0, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        92(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y12, Y12
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        112(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y12, Y0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        116(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        120(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        124(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPADDD      64(R8), Y12, Y0
	VMOVDQU     Y0, 352(R14)
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        144(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPALIGNR    $0x08, Y13, Y12, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        148(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y7, Y8, Y8
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        152(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPXOR       Y15, Y0, Y0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        156(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y0, Y8, Y8
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        176(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSLLD      $0x02, Y8, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        180(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSRLD      $0x1e, Y8, Y8
	VPOR        Y8, Y0, Y8
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        184(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        188(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPADDD      64(R8), Y8, Y0
	VMOVDQU     Y0, 384(R14)
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        208(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y12, Y8, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        212(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y5, Y7, Y7
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        216(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y14, Y0, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        220(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y0, Y7, Y7
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        240(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSLLD      $0x02, Y7, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        244(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y7, Y7
	VPOR        Y7, Y0, Y7
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        248(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        252(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPADDD      64(R8), Y7, Y0
	VMOVDQU     Y0, 416(R14)
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        272(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPALIGNR    $0x08, Y8, Y7, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        276(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y3, Y5, Y5
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        280(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPXOR       Y13, Y0, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        284(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y5, Y5
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        304(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y5, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        308(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y5, Y5
	VPOR        Y5, Y0, Y5
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        312(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        316(R15), CX
	VPADDD      64(R8), Y5, Y0
	VMOVDQU     Y0, 448(R14)
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        336(R15), DX
	VPALIGNR    $0x08, Y7, Y5, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        340(R15), AX
	VPXOR       Y15, Y3, Y3
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        344(R15), DI
	VPXOR       Y12, Y0, Y0
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        348(R15), SI
	VPXOR       Y0, Y3, Y3
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        368(R15), BX
	VPSLLD      $0x02, Y3, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        372(R15), CX
	VPSRLD      $0x1e, Y3, Y3
	VPOR        Y3, Y0, Y3
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        376(R15), DX
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        380(R15), AX
	VPADDD      96(R8), Y3, Y0
	VMOVDQU     Y0, 480(R14)
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        400(R15), DI
	VPALIGNR    $0x08, Y5, Y3, Y0
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        404(R15), SI
	VPXOR       Y14, Y15, Y15
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        408(R15), BX
	VPXOR       Y8, Y0, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        412(R15), CX
	VPXOR       Y0, Y15, Y15
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        432(R15), DX
	VPSLLD      $0x02, Y15, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        436(R15), AX
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        440(R15), DI
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        444(R15), SI
	VPADDD      96(R8), Y15, Y0
	VMOVDQU     Y0, 512(R14)
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        464(R15), BX
	VPALIGNR    $0x08, Y3, Y15, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        468(R15), CX
	VPXOR       Y13, Y14, Y14
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        472(R15), DX
	VPXOR       Y7, Y0, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        476(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y14, Y14
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDQ        $0x80, R13
	CMPQ        R13, R11
	CMOVQCC     R8, R10
	ADDL        496(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y14, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        500(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        504(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        508(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPADDD      96(R8), Y14, Y0
	VMOVDQU     Y0, 544(R14)
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        528(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPALIGNR    $0x08, Y15, Y14, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        532(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y12, Y13, Y13
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        536(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPXOR       Y5, Y0, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        540(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y0, Y13, Y13
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        560(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSLLD      $0x02, Y13, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        564(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        568(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        572(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPADDD      96(R8), Y13, Y0
	VMOVDQU     Y0, 576(R14)
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        592(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y14, Y13, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        596(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y8, Y12, Y12
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        600(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y3, Y0, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        604(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y0, Y12, Y12
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        624(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSLLD      $0x02, Y12, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        628(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        632(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        636(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	VPADDD      96(R8), Y12, Y0
	VMOVDQU     Y0, 608(R14)
	ADDL        R12, SI
	ADDL        (R9), SI
	MOVL        SI, (R9)
	ADDL        4(R9), DI
	MOVL        DI, 4(R9)
	ADDL        8(R9), DX
	MOVL        DX, 8(R9)
	ADDL        12(R9), CX
	MOVL        CX, 12(R9)
	ADDL        16(R9), BX
	MOVL        BX, 16(R9)
	MOVL        SI, R12
	MOVL        DI, SI
	MOVL        DX, DI
	MOVL        BX, DX
	MOVL        CX, AX
	MOVL        R12, CX
	XCHGQ       R15, R14
	JMP         loop

DATA K_XMM_AR<>+0(SB)/4, $0x5a827999
DATA K_XMM_AR<>+4(SB)/4, $0x5a827999
DATA K_XMM_AR<>+8(SB)/4, $0x5a827999
DATA K_XMM_AR<>+12(SB)/4, $0x5a827999
DATA K_XMM_AR<>+16(SB)/4, $0x5a827999
DATA K_XMM_AR<>+20(SB)/4, $0x5a827999
DATA K_XMM_AR<>+24(SB)/4, $0x5a827999
DATA K_XMM_AR<>+28(SB)/4, $0x5a827999
DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6
GLOBL K_XMM_AR<>(SB), RODATA, $128

DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203
DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607
DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b
DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f
DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203
DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607
DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b
DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f
GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32