// Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT.

//go:build !purego

#include "textflag.h"

// func blockAVX2(dig *digest, p []byte)
// Requires: AVX, AVX2, BMI, BMI2, CMOV
TEXT ·blockAVX2(SB), $1408-32
	MOVQ        dig+0(FP), DI
	MOVQ        p_base+8(FP), SI
	MOVQ        p_len+16(FP), DX
	SHRQ        $0x06, DX
	SHLQ        $0x06, DX
	LEAQ        K_XMM_AR<>+0(SB), R8
	MOVQ        DI, R9
	MOVQ        SI, R10
	LEAQ        64(SI), R13
	ADDQ        SI, DX
	ADDQ        $0x40, DX
	MOVQ        DX, R11
	CMPQ        R13, R11
	CMOVQCC     R8, R13
	VMOVDQU     BSWAP_SHUFB_CTL<>+0(SB), Y10
	MOVL        (R9), CX
	MOVL        4(R9), SI
	MOVL        8(R9), DI
	MOVL        12(R9), AX
	MOVL        16(R9), DX
	MOVQ        SP, R14
	LEAQ        672(SP), R15
	VMOVDQU     (R10), X0
	VINSERTI128 $0x01, (R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y15
	VPADDD      (R8), Y15, Y0
	VMOVDQU     Y0, (R14)
	VMOVDQU     16(R10), X0
	VINSERTI128 $0x01, 16(R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y14
	VPADDD      (R8), Y14, Y0
	VMOVDQU     Y0, 32(R14)
	VMOVDQU     32(R10), X0
	VINSERTI128 $0x01, 32(R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y13
	VPADDD      (R8), Y13, Y0
	VMOVDQU     Y0, 64(R14)
	VMOVDQU     48(R10), X0
	VINSERTI128 $0x01, 48(R13), Y0, Y0
	VPSHUFB     Y10, Y0, Y12
	VPADDD      (R8), Y12, Y0
	VMOVDQU     Y0, 96(R14)
	VPALIGNR    $0x08, Y15, Y14, Y8
	VPSRLDQ     $0x04, Y12, Y0
	VPXOR       Y13, Y8, Y8
	VPXOR       Y15, Y0, Y0
	VPXOR       Y0, Y8, Y8
	VPSLLDQ     $0x0c, Y8, Y9
	VPSLLD      $0x01, Y8, Y0
	VPSRLD      $0x1f, Y8, Y8
	VPOR        Y8, Y0, Y0
	VPSLLD      $0x02, Y9, Y8
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y8, Y0, Y0
	VPXOR       Y9, Y0, Y8
	VPADDD      (R8), Y8, Y0
	VMOVDQU     Y0, 128(R14)
	VPALIGNR    $0x08, Y14, Y13, Y7
	VPSRLDQ     $0x04, Y8, Y0
	VPXOR       Y12, Y7, Y7
	VPXOR       Y14, Y0, Y0
	VPXOR       Y0, Y7, Y7
	VPSLLDQ     $0x0c, Y7, Y9
	VPSLLD      $0x01, Y7, Y0
	VPSRLD      $0x1f, Y7, Y7
	VPOR        Y7, Y0, Y0
	VPSLLD      $0x02, Y9, Y7
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y7, Y0, Y0
	VPXOR       Y9, Y0, Y7
	VPADDD      32(R8), Y7, Y0
	VMOVDQU     Y0, 160(R14)
	VPALIGNR    $0x08, Y13, Y12, Y5
	VPSRLDQ     $0x04, Y7, Y0
	VPXOR       Y8, Y5, Y5
	VPXOR       Y13, Y0, Y0
	VPXOR       Y0, Y5, Y5
	VPSLLDQ     $0x0c, Y5, Y9
	VPSLLD      $0x01, Y5, Y0
	VPSRLD      $0x1f, Y5, Y5
	VPOR        Y5, Y0, Y0
	VPSLLD      $0x02, Y9, Y5
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y5, Y0, Y0
	VPXOR       Y9, Y0, Y5
	VPADDD      32(R8), Y5, Y0
	VMOVDQU     Y0, 192(R14)
	VPALIGNR    $0x08, Y12, Y8, Y3
	VPSRLDQ     $0x04, Y5, Y0
	VPXOR       Y7, Y3, Y3
	VPXOR       Y12, Y0, Y0
	VPXOR       Y0, Y3, Y3
	VPSLLDQ     $0x0c, Y3, Y9
	VPSLLD      $0x01, Y3, Y0
	VPSRLD      $0x1f, Y3, Y3
	VPOR        Y3, Y0, Y0
	VPSLLD      $0x02, Y9, Y3
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y3, Y0, Y0
	VPXOR       Y9, Y0, Y3
	VPADDD      32(R8), Y3, Y0
	VMOVDQU     Y0, 224(R14)
	VPALIGNR    $0x08, Y5, Y3, Y0
	VPXOR       Y14, Y15, Y15
	VPXOR       Y8, Y0, Y0
	VPXOR       Y0, Y15, Y15
	VPSLLD      $0x02, Y15, Y0
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	VPADDD      32(R8), Y15, Y0
	VMOVDQU     Y0, 256(R14)
	VPALIGNR    $0x08, Y3, Y15, Y0
	VPXOR       Y13, Y14, Y14
	VPXOR       Y7, Y0, Y0
	VPXOR       Y0, Y14, Y14
	VPSLLD      $0x02, Y14, Y0
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	VPADDD      32(R8), Y14, Y0
	VMOVDQU     Y0, 288(R14)
	VPALIGNR    $0x08, Y15, Y14, Y0
	VPXOR       Y12, Y13, Y13
	VPXOR       Y5, Y0, Y0
	VPXOR       Y0, Y13, Y13
	VPSLLD      $0x02, Y13, Y0
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	VPADDD      64(R8), Y13, Y0
	VMOVDQU     Y0, 320(R14)
	VPALIGNR    $0x08, Y14, Y13, Y0
	VPXOR       Y8, Y12, Y12
	VPXOR       Y3, Y0, Y0
	VPXOR       Y0, Y12, Y12
	VPSLLD      $0x02, Y12, Y0
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	VPADDD      64(R8), Y12, Y0
	VMOVDQU     Y0, 352(R14)
	VPALIGNR    $0x08, Y13, Y12, Y0
	VPXOR       Y7, Y8, Y8
	VPXOR       Y15, Y0, Y0
	VPXOR       Y0, Y8, Y8
	VPSLLD      $0x02, Y8, Y0
	VPSRLD      $0x1e, Y8, Y8
	VPOR        Y8, Y0, Y8
	VPADDD      64(R8), Y8, Y0
	VMOVDQU     Y0, 384(R14)
	VPALIGNR    $0x08, Y12, Y8, Y0
	VPXOR       Y5, Y7, Y7
	VPXOR       Y14, Y0, Y0
	VPXOR       Y0, Y7, Y7
	VPSLLD      $0x02, Y7, Y0
	VPSRLD      $0x1e, Y7, Y7
	VPOR        Y7, Y0, Y7
	VPADDD      64(R8), Y7, Y0
	VMOVDQU     Y0, 416(R14)
	VPALIGNR    $0x08, Y8, Y7, Y0
	VPXOR       Y3, Y5, Y5
	VPXOR       Y13, Y0, Y0
	VPXOR       Y0, Y5, Y5
	VPSLLD      $0x02, Y5, Y0
	VPSRLD      $0x1e, Y5, Y5
	VPOR        Y5, Y0, Y5
	VPADDD      64(R8), Y5, Y0
	VMOVDQU     Y0, 448(R14)
	VPALIGNR    $0x08, Y7, Y5, Y0
	VPXOR       Y15, Y3, Y3
	VPXOR       Y12, Y0, Y0
	VPXOR       Y0, Y3, Y3
	VPSLLD      $0x02, Y3, Y0
	VPSRLD      $0x1e, Y3, Y3
	VPOR        Y3, Y0, Y3
	VPADDD      96(R8), Y3, Y0
	VMOVDQU     Y0, 480(R14)
	VPALIGNR    $0x08, Y5, Y3, Y0
	VPXOR       Y14, Y15, Y15
	VPXOR       Y8, Y0, Y0
	VPXOR       Y0, Y15, Y15
	VPSLLD      $0x02, Y15, Y0
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	VPADDD      96(R8), Y15, Y0
	VMOVDQU     Y0, 512(R14)
	VPALIGNR    $0x08, Y3, Y15, Y0
	VPXOR       Y13, Y14, Y14
	VPXOR       Y7, Y0, Y0
	VPXOR       Y0, Y14, Y14
	VPSLLD      $0x02, Y14, Y0
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	VPADDD      96(R8), Y14, Y0
	VMOVDQU     Y0, 544(R14)
	VPALIGNR    $0x08, Y15, Y14, Y0
	VPXOR       Y12, Y13, Y13
	VPXOR       Y5, Y0, Y0
	VPXOR       Y0, Y13, Y13
	VPSLLD      $0x02, Y13, Y0
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	VPADDD      96(R8), Y13, Y0
	VMOVDQU     Y0, 576(R14)
	VPALIGNR    $0x08, Y14, Y13, Y0
	VPXOR       Y8, Y12, Y12
	VPXOR       Y3, Y0, Y0
	VPXOR       Y0, Y12, Y12
	VPSLLD      $0x02, Y12, Y0
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	VPADDD      96(R8), Y12, Y0
	VMOVDQU     Y0, 608(R14)
	XCHGQ       R15, R14

loop:
	CMPQ R10, R8
	JNE  begin
	VZEROUPPER
	RET

begin:
	MOVL        SI, BX
	RORXL       $0x02, SI, SI
	ANDNL       AX, BX, BP
	ANDL        DI, BX
	XORL        BP, BX
	ADDL        (R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VMOVDQU     128(R10), X0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        4(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VINSERTI128 $0x01, 128(R13), Y0, Y0
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        8(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSHUFB     Y10, Y0, Y15
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        12(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        32(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPADDD      (R8), Y15, Y0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        36(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        40(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        44(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VMOVDQU     Y0, (R14)
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        64(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VMOVDQU     144(R10), X0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        68(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VINSERTI128 $0x01, 144(R13), Y0, Y0
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        72(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSHUFB     Y10, Y0, Y14
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        76(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        96(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPADDD      (R8), Y14, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        100(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        104(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        108(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VMOVDQU     Y0, 32(R14)
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        128(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VMOVDQU     160(R10), X0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        132(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VINSERTI128 $0x01, 160(R13), Y0, Y0
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        136(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSHUFB     Y10, Y0, Y13
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        140(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        160(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPADDD      (R8), Y13, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        164(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        168(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        172(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VMOVDQU     Y0, 64(R14)
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        192(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VMOVDQU     176(R10), X0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        196(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VINSERTI128 $0x01, 176(R13), Y0, Y0
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        200(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSHUFB     Y10, Y0, Y12
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        204(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        224(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPADDD      (R8), Y12, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        228(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        232(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        236(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VMOVDQU     Y0, 96(R14)
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        256(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y15, Y14, Y8
	VPSRLDQ     $0x04, Y12, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        260(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y13, Y8, Y8
	VPXOR       Y15, Y0, Y0
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        264(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y0, Y8, Y8
	VPSLLDQ     $0x0c, Y8, Y9
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        268(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSLLD      $0x01, Y8, Y0
	VPSRLD      $0x1f, Y8, Y8
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        288(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPOR        Y8, Y0, Y0
	VPSLLD      $0x02, Y9, Y8
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        292(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y8, Y0, Y0
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        296(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        300(R15), SI
	VPXOR       Y9, Y0, Y8
	VPADDD      (R8), Y8, Y0
	VMOVDQU     Y0, 128(R14)
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        320(R15), BX
	VPALIGNR    $0x08, Y14, Y13, Y7
	VPSRLDQ     $0x04, Y8, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        324(R15), CX
	VPXOR       Y12, Y7, Y7
	VPXOR       Y14, Y0, Y0
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        328(R15), DX
	VPXOR       Y0, Y7, Y7
	VPSLLDQ     $0x0c, Y7, Y9
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        332(R15), AX
	VPSLLD      $0x01, Y7, Y0
	VPSRLD      $0x1f, Y7, Y7
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        352(R15), DI
	VPOR        Y7, Y0, Y0
	VPSLLD      $0x02, Y9, Y7
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        356(R15), SI
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y7, Y0, Y0
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        360(R15), BX
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        364(R15), CX
	VPXOR       Y9, Y0, Y7
	VPADDD      32(R8), Y7, Y0
	VMOVDQU     Y0, 160(R14)
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        384(R15), DX
	VPALIGNR    $0x08, Y13, Y12, Y5
	VPSRLDQ     $0x04, Y7, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        388(R15), AX
	VPXOR       Y8, Y5, Y5
	VPXOR       Y13, Y0, Y0
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        392(R15), DI
	VPXOR       Y0, Y5, Y5
	VPSLLDQ     $0x0c, Y5, Y9
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        396(R15), SI
	VPSLLD      $0x01, Y5, Y0
	VPSRLD      $0x1f, Y5, Y5
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        416(R15), BX
	VPOR        Y5, Y0, Y0
	VPSLLD      $0x02, Y9, Y5
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        420(R15), CX
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y5, Y0, Y0
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        424(R15), DX
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        428(R15), AX
	VPXOR       Y9, Y0, Y5
	VPADDD      32(R8), Y5, Y0
	VMOVDQU     Y0, 192(R14)
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        448(R15), DI
	VPALIGNR    $0x08, Y12, Y8, Y3
	VPSRLDQ     $0x04, Y5, Y0
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        452(R15), SI
	VPXOR       Y7, Y3, Y3
	VPXOR       Y12, Y0, Y0
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        456(R15), BX
	VPXOR       Y0, Y3, Y3
	VPSLLDQ     $0x0c, Y3, Y9
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        460(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSLLD      $0x01, Y3, Y0
	VPSRLD      $0x1f, Y3, Y3
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDQ        $0x80, R10
	CMPQ        R10, R11
	CMOVQCC     R8, R10
	ADDL        480(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPOR        Y3, Y0, Y0
	VPSLLD      $0x02, Y9, Y3
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        484(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y9, Y9
	VPXOR       Y3, Y0, Y0
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        488(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        492(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y9, Y0, Y3
	VPADDD      32(R8), Y3, Y0
	VMOVDQU     Y0, 224(R14)
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        512(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPALIGNR    $0x08, Y5, Y3, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        516(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y14, Y15, Y15
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        520(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPXOR       Y8, Y0, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        524(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y15, Y15
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        544(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y15, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        548(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        552(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        556(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPADDD      32(R8), Y15, Y0
	VMOVDQU     Y0, 256(R14)
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        576(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPALIGNR    $0x08, Y3, Y15, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        580(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y13, Y14, Y14
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        584(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPXOR       Y7, Y0, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        588(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y0, Y14, Y14
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        608(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSLLD      $0x02, Y14, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        612(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        616(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        620(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	VPADDD      32(R8), Y14, Y0
	VMOVDQU     Y0, 288(R14)
	ADDL        R12, AX
	ADDL        (R9), AX
	MOVL        AX, (R9)
	ADDL        4(R9), DX
	MOVL        DX, 4(R9)
	ADDL        8(R9), BX
	MOVL        BX, 8(R9)
	ADDL        12(R9), SI
	MOVL        SI, 12(R9)
	ADDL        16(R9), DI
	MOVL        DI, 16(R9)
	CMPQ        R10, R8
	JE          loop
	MOVL        DX, CX
	MOVL        CX, DX
	RORXL       $0x02, CX, CX
	ANDNL       SI, DX, BP
	ANDL        BX, DX
	XORL        BP, DX
	ADDL        16(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y15, Y14, Y0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        20(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y12, Y13, Y13
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        24(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y5, Y0, Y0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        28(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y0, Y13, Y13
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        48(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSLLD      $0x02, Y13, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        52(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        56(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        60(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPADDD      64(R8), Y13, Y0
	VMOVDQU     Y0, 320(R14)
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        80(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPALIGNR    $0x08, Y14, Y13, Y0
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        84(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y8, Y12, Y12
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        88(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPXOR       Y3, Y0, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        92(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y12, Y12
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        112(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y12, Y0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        116(R15), SI
	ANDNL       CX, DI, BP
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	ANDL        DX, DI
	XORL        BP, DI
	LEAL        (SI)(R12*1), SI
	ADDL        120(R15), BX
	ANDNL       DX, SI, BP
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        AX, SI
	XORL        BP, SI
	LEAL        (BX)(R12*1), BX
	ADDL        124(R15), CX
	ANDNL       AX, BX, BP
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPADDD      64(R8), Y12, Y0
	VMOVDQU     Y0, 352(R14)
	ANDL        DI, BX
	XORL        BP, BX
	LEAL        (CX)(R12*1), CX
	ADDL        144(R15), DX
	ANDNL       DI, CX, BP
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPALIGNR    $0x08, Y13, Y12, Y0
	ANDL        SI, CX
	XORL        BP, CX
	LEAL        (DX)(R12*1), DX
	ADDL        148(R15), AX
	ANDNL       SI, DX, BP
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y7, Y8, Y8
	ANDL        BX, DX
	XORL        BP, DX
	LEAL        (AX)(R12*1), AX
	ADDL        152(R15), DI
	ANDNL       BX, AX, BP
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPXOR       Y15, Y0, Y0
	ANDL        CX, AX
	XORL        BP, AX
	LEAL        (DI)(R12*1), DI
	ADDL        156(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y0, Y8, Y8
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        176(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSLLD      $0x02, Y8, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        180(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSRLD      $0x1e, Y8, Y8
	VPOR        Y8, Y0, Y8
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        184(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        188(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPADDD      64(R8), Y8, Y0
	VMOVDQU     Y0, 384(R14)
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        208(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y12, Y8, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        212(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y5, Y7, Y7
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        216(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y14, Y0, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        220(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y0, Y7, Y7
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        240(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSLLD      $0x02, Y7, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        244(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y7, Y7
	VPOR        Y7, Y0, Y7
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        248(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        252(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPADDD      64(R8), Y7, Y0
	VMOVDQU     Y0, 416(R14)
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        272(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPALIGNR    $0x08, Y8, Y7, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        276(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y3, Y5, Y5
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        280(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPXOR       Y13, Y0, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        284(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y5, Y5
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        304(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y5, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        308(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y5, Y5
	VPOR        Y5, Y0, Y5
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        312(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        316(R15), CX
	VPADDD      64(R8), Y5, Y0
	VMOVDQU     Y0, 448(R14)
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        336(R15), DX
	VPALIGNR    $0x08, Y7, Y5, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        340(R15), AX
	VPXOR       Y15, Y3, Y3
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        344(R15), DI
	VPXOR       Y12, Y0, Y0
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        348(R15), SI
	VPXOR       Y0, Y3, Y3
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        368(R15), BX
	VPSLLD      $0x02, Y3, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        372(R15), CX
	VPSRLD      $0x1e, Y3, Y3
	VPOR        Y3, Y0, Y3
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        376(R15), DX
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        380(R15), AX
	VPADDD      96(R8), Y3, Y0
	VMOVDQU     Y0, 480(R14)
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        400(R15), DI
	VPALIGNR    $0x08, Y5, Y3, Y0
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        404(R15), SI
	VPXOR       Y14, Y15, Y15
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        408(R15), BX
	VPXOR       Y8, Y0, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        412(R15), CX
	VPXOR       Y0, Y15, Y15
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        432(R15), DX
	VPSLLD      $0x02, Y15, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        436(R15), AX
	VPSRLD      $0x1e, Y15, Y15
	VPOR        Y15, Y0, Y15
	LEAL        (AX)(CX*1), AX
	MOVL        BX, BP
	ORL         DX, BP
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	ANDL        SI, BP
	ANDL        BX, DX
	ORL         BP, DX
	ADDL        R12, AX
	ADDL        440(R15), DI
	LEAL        (DI)(DX*1), DI
	MOVL        CX, BP
	ORL         AX, BP
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	ANDL        BX, BP
	ANDL        CX, AX
	ORL         BP, AX
	ADDL        R12, DI
	ADDL        444(R15), SI
	VPADDD      96(R8), Y15, Y0
	VMOVDQU     Y0, 512(R14)
	LEAL        (SI)(AX*1), SI
	MOVL        DX, BP
	ORL         DI, BP
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	ANDL        CX, BP
	ANDL        DX, DI
	ORL         BP, DI
	ADDL        R12, SI
	ADDL        464(R15), BX
	VPALIGNR    $0x08, Y3, Y15, Y0
	LEAL        (BX)(DI*1), BX
	MOVL        AX, BP
	ORL         SI, BP
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	ANDL        DX, BP
	ANDL        AX, SI
	ORL         BP, SI
	ADDL        R12, BX
	ADDL        468(R15), CX
	VPXOR       Y13, Y14, Y14
	LEAL        (CX)(SI*1), CX
	MOVL        DI, BP
	ORL         BX, BP
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	ANDL        AX, BP
	ANDL        DI, BX
	ORL         BP, BX
	ADDL        R12, CX
	ADDL        472(R15), DX
	VPXOR       Y7, Y0, Y0
	LEAL        (DX)(BX*1), DX
	MOVL        SI, BP
	ORL         CX, BP
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	ANDL        DI, BP
	ANDL        SI, CX
	ORL         BP, CX
	ADDL        R12, DX
	ADDL        476(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y0, Y14, Y14
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDQ        $0x80, R13
	CMPQ        R13, R11
	CMOVQCC     R8, R10
	ADDL        496(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPSLLD      $0x02, Y14, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        500(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPSRLD      $0x1e, Y14, Y14
	VPOR        Y14, Y0, Y14
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        504(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        508(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPADDD      96(R8), Y14, Y0
	VMOVDQU     Y0, 544(R14)
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        528(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPALIGNR    $0x08, Y15, Y14, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        532(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPXOR       Y12, Y13, Y13
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        536(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPXOR       Y5, Y0, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        540(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y0, Y13, Y13
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        560(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPSLLD      $0x02, Y13, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        564(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPSRLD      $0x1e, Y13, Y13
	VPOR        Y13, Y0, Y13
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        568(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        572(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPADDD      96(R8), Y13, Y0
	VMOVDQU     Y0, 576(R14)
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        592(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	VPALIGNR    $0x08, Y14, Y13, Y0
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        596(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	RORXL       $0x02, DI, AX
	VPXOR       Y8, Y12, Y12
	XORL        DX, DI
	ADDL        R12, SI
	XORL        CX, DI
	ADDL        600(R15), BX
	LEAL        (BX)(DI*1), BX
	RORXL       $0x1b, SI, R12
	RORXL       $0x02, SI, DI
	VPXOR       Y3, Y0, Y0
	XORL        AX, SI
	ADDL        R12, BX
	XORL        DX, SI
	ADDL        604(R15), CX
	LEAL        (CX)(SI*1), CX
	RORXL       $0x1b, BX, R12
	RORXL       $0x02, BX, SI
	VPXOR       Y0, Y12, Y12
	XORL        DI, BX
	ADDL        R12, CX
	XORL        AX, BX
	ADDL        624(R15), DX
	LEAL        (DX)(BX*1), DX
	RORXL       $0x1b, CX, R12
	RORXL       $0x02, CX, BX
	VPSLLD      $0x02, Y12, Y0
	XORL        SI, CX
	ADDL        R12, DX
	XORL        DI, CX
	ADDL        628(R15), AX
	LEAL        (AX)(CX*1), AX
	RORXL       $0x1b, DX, R12
	RORXL       $0x02, DX, CX
	VPSRLD      $0x1e, Y12, Y12
	VPOR        Y12, Y0, Y12
	XORL        BX, DX
	ADDL        R12, AX
	XORL        SI, DX
	ADDL        632(R15), DI
	LEAL        (DI)(DX*1), DI
	RORXL       $0x1b, AX, R12
	RORXL       $0x02, AX, DX
	XORL        CX, AX
	ADDL        R12, DI
	XORL        BX, AX
	ADDL        636(R15), SI
	LEAL        (SI)(AX*1), SI
	RORXL       $0x1b, DI, R12
	VPADDD      96(R8), Y12, Y0
	VMOVDQU     Y0, 608(R14)
	ADDL        R12, SI
	ADDL        (R9), SI
	MOVL        SI, (R9)
	ADDL        4(R9), DI
	MOVL        DI, 4(R9)
	ADDL        8(R9), DX
	MOVL        DX, 8(R9)
	ADDL        12(R9), CX
	MOVL        CX, 12(R9)
	ADDL        16(R9), BX
	MOVL        BX, 16(R9)
	MOVL        SI, R12
	MOVL        DI, SI
	MOVL        DX, DI
	MOVL        BX, DX
	MOVL        CX, AX
	MOVL        R12, CX
	XCHGQ       R15, R14
	JMP         loop

DATA K_XMM_AR<>+0(SB)/4, $0x5a827999
DATA K_XMM_AR<>+4(SB)/4, $0x5a827999
DATA K_XMM_AR<>+8(SB)/4, $0x5a827999
DATA K_XMM_AR<>+12(SB)/4, $0x5a827999
DATA K_XMM_AR<>+16(SB)/4, $0x5a827999
DATA K_XMM_AR<>+20(SB)/4, $0x5a827999
DATA K_XMM_AR<>+24(SB)/4, $0x5a827999
DATA K_XMM_AR<>+28(SB)/4, $0x5a827999
DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1
DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc
DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6
DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6
GLOBL K_XMM_AR<>(SB), RODATA, $128

DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203
DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607
DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b
DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f
DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203
DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607
DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b
DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f
GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32

// func blockSHANI(dig *digest, p []byte)
// Requires: AVX, SHA, SSE2, SSE4.1, SSSE3
TEXT ·blockSHANI(SB), $48-32
	MOVQ dig+0(FP), DI
	MOVQ p_base+8(FP), SI
	MOVQ p_len+16(FP), DX
	CMPQ DX, $0x00
	JEQ  done
	ADDQ SI, DX

	// Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes
	LEAQ 15(SP), AX
	MOVQ $0x000000000000000f, CX
	NOTQ CX
	ANDQ CX, AX

	// Load initial hash state
	PINSRD  $0x03, 16(DI), X5
	VMOVDQU (DI), X0
	PAND    upper_mask<>+0(SB), X5
	PSHUFD  $0x1b, X0, X0
	VMOVDQA shuffle_mask<>+0(SB), X7

loop:
	// Save ABCD and E working values
	VMOVDQA X5, (AX)
	VMOVDQA X0, 16(AX)

	// Rounds 0-3
	VMOVDQU   (SI), X1
	PSHUFB    X7, X1
	PADDD     X1, X5
	VMOVDQA   X0, X6
	SHA1RNDS4 $0x00, X5, X0

	// Rounds 4-7
	VMOVDQU   16(SI), X2
	PSHUFB    X7, X2
	SHA1NEXTE X2, X6
	VMOVDQA   X0, X5
	SHA1RNDS4 $0x00, X6, X0
	SHA1MSG1  X2, X1

	// Rounds 8-11
	VMOVDQU   32(SI), X3
	PSHUFB    X7, X3
	SHA1NEXTE X3, X5
	VMOVDQA   X0, X6
	SHA1RNDS4 $0x00, X5, X0
	SHA1MSG1  X3, X2
	PXOR      X3, X1

	// Rounds 12-15
	VMOVDQU   48(SI), X4
	PSHUFB    X7, X4
	SHA1NEXTE X4, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X4, X1
	SHA1RNDS4 $0x00, X6, X0
	SHA1MSG1  X4, X3
	PXOR      X4, X2

	// Rounds 16-19
	SHA1NEXTE X1, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X1, X2
	SHA1RNDS4 $0x00, X5, X0
	SHA1MSG1  X1, X4
	PXOR      X1, X3

	// Rounds 20-23
	SHA1NEXTE X2, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X2, X3
	SHA1RNDS4 $0x01, X6, X0
	SHA1MSG1  X2, X1
	PXOR      X2, X4

	// Rounds 24-27
	SHA1NEXTE X3, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X3, X4
	SHA1RNDS4 $0x01, X5, X0
	SHA1MSG1  X3, X2
	PXOR      X3, X1

	// Rounds 28-31
	SHA1NEXTE X4, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X4, X1
	SHA1RNDS4 $0x01, X6, X0
	SHA1MSG1  X4, X3
	PXOR      X4, X2

	// Rounds 32-35
	SHA1NEXTE X1, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X1, X2
	SHA1RNDS4 $0x01, X5, X0
	SHA1MSG1  X1, X4
	PXOR      X1, X3

	// Rounds 36-39
	SHA1NEXTE X2, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X2, X3
	SHA1RNDS4 $0x01, X6, X0
	SHA1MSG1  X2, X1
	PXOR      X2, X4

	// Rounds 40-43
	SHA1NEXTE X3, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X3, X4
	SHA1RNDS4 $0x02, X5, X0
	SHA1MSG1  X3, X2
	PXOR      X3, X1

	// Rounds 44-47
	SHA1NEXTE X4, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X4, X1
	SHA1RNDS4 $0x02, X6, X0
	SHA1MSG1  X4, X3
	PXOR      X4, X2

	// Rounds 48-51
	SHA1NEXTE X1, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X1, X2
	SHA1RNDS4 $0x02, X5, X0
	SHA1MSG1  X1, X4
	PXOR      X1, X3

	// Rounds 52-55
	SHA1NEXTE X2, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X2, X3
	SHA1RNDS4 $0x02, X6, X0
	SHA1MSG1  X2, X1
	PXOR      X2, X4

	// Rounds 56-59
	SHA1NEXTE X3, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X3, X4
	SHA1RNDS4 $0x02, X5, X0
	SHA1MSG1  X3, X2
	PXOR      X3, X1

	// Rounds 60-63
	SHA1NEXTE X4, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X4, X1
	SHA1RNDS4 $0x03, X6, X0
	SHA1MSG1  X4, X3
	PXOR      X4, X2

	// Rounds 64-67
	SHA1NEXTE X1, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X1, X2
	SHA1RNDS4 $0x03, X5, X0
	SHA1MSG1  X1, X4
	PXOR      X1, X3

	// Rounds 68-71
	SHA1NEXTE X2, X6
	VMOVDQA   X0, X5
	SHA1MSG2  X2, X3
	SHA1RNDS4 $0x03, X6, X0
	PXOR      X2, X4

	// Rounds 72-75
	SHA1NEXTE X3, X5
	VMOVDQA   X0, X6
	SHA1MSG2  X3, X4
	SHA1RNDS4 $0x03, X5, X0

	// Rounds 76-79
	SHA1NEXTE X4, X6
	VMOVDQA   X0, X5
	SHA1RNDS4 $0x03, X6, X0

	// Add saved E and ABCD
	SHA1NEXTE (AX), X5
	PADDD     16(AX), X0

	// Check if we are done, if not return to the loop
	ADDQ $0x40, SI
	CMPQ SI, DX
	JNE  loop

	// Write the hash state back to digest
	PSHUFD  $0x1b, X0, X0
	VMOVDQU X0, (DI)
	PEXTRD  $0x03, X5, 16(DI)

done:
	RET

DATA upper_mask<>+0(SB)/8, $0x0000000000000000
DATA upper_mask<>+8(SB)/8, $0xffffffff00000000
GLOBL upper_mask<>(SB), RODATA, $16

DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f
DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607
GLOBL shuffle_mask<>(SB), RODATA, $16