// Code generated by command: go run sha512block_amd64_asm.go -out ../sha512block_amd64.s. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
// func blockAVX2(dig *Digest, p []byte)
// Requires: AVX, AVX2, BMI2
TEXT ·blockAVX2(SB), NOSPLIT, $56-32
MOVQ dig+0(FP), SI
MOVQ p_base+8(FP), DI
MOVQ p_len+16(FP), DX
SHRQ $0x07, DX
SHLQ $0x07, DX
JZ done_hash
ADDQ DI, DX
MOVQ DX, 48(SP)
MOVQ (SI), AX
MOVQ 8(SI), BX
MOVQ 16(SI), CX
MOVQ 24(SI), R8
MOVQ 32(SI), DX
MOVQ 40(SI), R9
MOVQ 48(SI), R10
MOVQ 56(SI), R11
VMOVDQU PSHUFFLE_BYTE_FLIP_MASK<>+0(SB), Y9
loop0:
MOVQ $·_K+0(SB), BP
VMOVDQU (DI), Y4
VPSHUFB Y9, Y4, Y4
VMOVDQU 32(DI), Y5
VPSHUFB Y9, Y5, Y5
VMOVDQU 64(DI), Y6
VPSHUFB Y9, Y6, Y6
VMOVDQU 96(DI), Y7
VPSHUFB Y9, Y7, Y7
MOVQ DI, 40(SP)
MOVQ $0x00000004, 32(SP)
loop1:
VPADDQ (BP), Y4, Y0
VMOVDQU Y0, (SP)
VPERM2F128 $0x03, Y6, Y7, Y0
VPALIGNR $0x08, Y6, Y0, Y0
VPADDQ Y4, Y0, Y0
VPERM2F128 $0x03, Y4, Y5, Y1
VPALIGNR $0x08, Y4, Y1, Y1
VPSRLQ $0x01, Y1, Y2
VPSLLQ $0x3f, Y1, Y3
VPOR Y2, Y3, Y3
VPSRLQ $0x07, Y1, Y8
MOVQ AX, DI
RORXQ $0x29, DX, R13
RORXQ $0x12, DX, R14
ADDQ (SP), R11
ORQ CX, DI
MOVQ R9, R15
RORXQ $0x22, AX, R12
XORQ R14, R13
XORQ R10, R15
RORXQ $0x0e, DX, R14
ANDQ DX, R15
XORQ R14, R13
RORXQ $0x27, AX, R14
ADDQ R11, R8
ANDQ BX, DI
XORQ R12, R14
RORXQ $0x1c, AX, R12
XORQ R10, R15
XORQ R12, R14
MOVQ AX, R12
ANDQ CX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R11
ADDQ R15, R8
ADDQ R15, R11
ADDQ DI, R11
VPSRLQ $0x08, Y1, Y2
VPSLLQ $0x38, Y1, Y1
VPOR Y2, Y1, Y1
VPXOR Y8, Y3, Y3
VPXOR Y1, Y3, Y1
VPADDQ Y1, Y0, Y0
VPERM2F128 $0x00, Y0, Y0, Y4
VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y7, Y7, Y2
VPSRLQ $0x06, Y2, Y8
MOVQ R11, DI
RORXQ $0x29, R8, R13
RORXQ $0x12, R8, R14
ADDQ 8(SP), R10
ORQ BX, DI
MOVQ DX, R15
RORXQ $0x22, R11, R12
XORQ R14, R13
XORQ R9, R15
RORXQ $0x0e, R8, R14
XORQ R14, R13
RORXQ $0x27, R11, R14
ANDQ R8, R15
ADDQ R10, CX
ANDQ AX, DI
XORQ R12, R14
RORXQ $0x1c, R11, R12
XORQ R9, R15
XORQ R12, R14
MOVQ R11, R12
ANDQ BX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R10
ADDQ R15, CX
ADDQ R15, R10
ADDQ DI, R10
VPSRLQ $0x13, Y2, Y3
VPSLLQ $0x2d, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y2, Y3
VPSLLQ $0x03, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y4, Y4
VPSRLQ $0x06, Y4, Y8
MOVQ R10, DI
RORXQ $0x29, CX, R13
ADDQ 16(SP), R9
RORXQ $0x12, CX, R14
ORQ AX, DI
MOVQ R8, R15
XORQ DX, R15
RORXQ $0x22, R10, R12
XORQ R14, R13
ANDQ CX, R15
RORXQ $0x0e, CX, R14
ADDQ R9, BX
ANDQ R11, DI
XORQ R14, R13
RORXQ $0x27, R10, R14
XORQ DX, R15
XORQ R12, R14
RORXQ $0x1c, R10, R12
XORQ R12, R14
MOVQ R10, R12
ANDQ AX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R9
ADDQ R15, BX
ADDQ R15, R9
ADDQ DI, R9
VPSRLQ $0x13, Y4, Y3
VPSLLQ $0x2d, Y4, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y4, Y3
VPSLLQ $0x03, Y4, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y0, Y2
VPBLENDD $0xf0, Y2, Y4, Y4
MOVQ R9, DI
RORXQ $0x29, BX, R13
RORXQ $0x12, BX, R14
ADDQ 24(SP), DX
ORQ R11, DI
MOVQ CX, R15
RORXQ $0x22, R9, R12
XORQ R14, R13
XORQ R8, R15
RORXQ $0x0e, BX, R14
ANDQ BX, R15
ADDQ DX, AX
ANDQ R10, DI
XORQ R14, R13
XORQ R8, R15
RORXQ $0x27, R9, R14
ADDQ R13, R15
XORQ R12, R14
ADDQ R15, AX
RORXQ $0x1c, R9, R12
XORQ R12, R14
MOVQ R9, R12
ANDQ R11, R12
ORQ R12, DI
ADDQ R14, DX
ADDQ R15, DX
ADDQ DI, DX
VPADDQ 32(BP), Y5, Y0
VMOVDQU Y0, (SP)
VPERM2F128 $0x03, Y7, Y4, Y0
VPALIGNR $0x08, Y7, Y0, Y0
VPADDQ Y5, Y0, Y0
VPERM2F128 $0x03, Y5, Y6, Y1
VPALIGNR $0x08, Y5, Y1, Y1
VPSRLQ $0x01, Y1, Y2
VPSLLQ $0x3f, Y1, Y3
VPOR Y2, Y3, Y3
VPSRLQ $0x07, Y1, Y8
MOVQ DX, DI
RORXQ $0x29, AX, R13
RORXQ $0x12, AX, R14
ADDQ (SP), R8
ORQ R10, DI
MOVQ BX, R15
RORXQ $0x22, DX, R12
XORQ R14, R13
XORQ CX, R15
RORXQ $0x0e, AX, R14
ANDQ AX, R15
XORQ R14, R13
RORXQ $0x27, DX, R14
ADDQ R8, R11
ANDQ R9, DI
XORQ R12, R14
RORXQ $0x1c, DX, R12
XORQ CX, R15
XORQ R12, R14
MOVQ DX, R12
ANDQ R10, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R8
ADDQ R15, R11
ADDQ R15, R8
ADDQ DI, R8
VPSRLQ $0x08, Y1, Y2
VPSLLQ $0x38, Y1, Y1
VPOR Y2, Y1, Y1
VPXOR Y8, Y3, Y3
VPXOR Y1, Y3, Y1
VPADDQ Y1, Y0, Y0
VPERM2F128 $0x00, Y0, Y0, Y5
VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y4, Y4, Y2
VPSRLQ $0x06, Y2, Y8
MOVQ R8, DI
RORXQ $0x29, R11, R13
RORXQ $0x12, R11, R14
ADDQ 8(SP), CX
ORQ R9, DI
MOVQ AX, R15
RORXQ $0x22, R8, R12
XORQ R14, R13
XORQ BX, R15
RORXQ $0x0e, R11, R14
XORQ R14, R13
RORXQ $0x27, R8, R14
ANDQ R11, R15
ADDQ CX, R10
ANDQ DX, DI
XORQ R12, R14
RORXQ $0x1c, R8, R12
XORQ BX, R15
XORQ R12, R14
MOVQ R8, R12
ANDQ R9, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, CX
ADDQ R15, R10
ADDQ R15, CX
ADDQ DI, CX
VPSRLQ $0x13, Y2, Y3
VPSLLQ $0x2d, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y2, Y3
VPSLLQ $0x03, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y5, Y5
VPSRLQ $0x06, Y5, Y8
MOVQ CX, DI
RORXQ $0x29, R10, R13
ADDQ 16(SP), BX
RORXQ $0x12, R10, R14
ORQ DX, DI
MOVQ R11, R15
XORQ AX, R15
RORXQ $0x22, CX, R12
XORQ R14, R13
ANDQ R10, R15
RORXQ $0x0e, R10, R14
ADDQ BX, R9
ANDQ R8, DI
XORQ R14, R13
RORXQ $0x27, CX, R14
XORQ AX, R15
XORQ R12, R14
RORXQ $0x1c, CX, R12
XORQ R12, R14
MOVQ CX, R12
ANDQ DX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, BX
ADDQ R15, R9
ADDQ R15, BX
ADDQ DI, BX
VPSRLQ $0x13, Y5, Y3
VPSLLQ $0x2d, Y5, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y5, Y3
VPSLLQ $0x03, Y5, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y0, Y2
VPBLENDD $0xf0, Y2, Y5, Y5
MOVQ BX, DI
RORXQ $0x29, R9, R13
RORXQ $0x12, R9, R14
ADDQ 24(SP), AX
ORQ R8, DI
MOVQ R10, R15
RORXQ $0x22, BX, R12
XORQ R14, R13
XORQ R11, R15
RORXQ $0x0e, R9, R14
ANDQ R9, R15
ADDQ AX, DX
ANDQ CX, DI
XORQ R14, R13
XORQ R11, R15
RORXQ $0x27, BX, R14
ADDQ R13, R15
XORQ R12, R14
ADDQ R15, DX
RORXQ $0x1c, BX, R12
XORQ R12, R14
MOVQ BX, R12
ANDQ R8, R12
ORQ R12, DI
ADDQ R14, AX
ADDQ R15, AX
ADDQ DI, AX
VPADDQ 64(BP), Y6, Y0
VMOVDQU Y0, (SP)
VPERM2F128 $0x03, Y4, Y5, Y0
VPALIGNR $0x08, Y4, Y0, Y0
VPADDQ Y6, Y0, Y0
VPERM2F128 $0x03, Y6, Y7, Y1
VPALIGNR $0x08, Y6, Y1, Y1
VPSRLQ $0x01, Y1, Y2
VPSLLQ $0x3f, Y1, Y3
VPOR Y2, Y3, Y3
VPSRLQ $0x07, Y1, Y8
MOVQ AX, DI
RORXQ $0x29, DX, R13
RORXQ $0x12, DX, R14
ADDQ (SP), R11
ORQ CX, DI
MOVQ R9, R15
RORXQ $0x22, AX, R12
XORQ R14, R13
XORQ R10, R15
RORXQ $0x0e, DX, R14
ANDQ DX, R15
XORQ R14, R13
RORXQ $0x27, AX, R14
ADDQ R11, R8
ANDQ BX, DI
XORQ R12, R14
RORXQ $0x1c, AX, R12
XORQ R10, R15
XORQ R12, R14
MOVQ AX, R12
ANDQ CX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R11
ADDQ R15, R8
ADDQ R15, R11
ADDQ DI, R11
VPSRLQ $0x08, Y1, Y2
VPSLLQ $0x38, Y1, Y1
VPOR Y2, Y1, Y1
VPXOR Y8, Y3, Y3
VPXOR Y1, Y3, Y1
VPADDQ Y1, Y0, Y0
VPERM2F128 $0x00, Y0, Y0, Y6
VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y5, Y5, Y2
VPSRLQ $0x06, Y2, Y8
MOVQ R11, DI
RORXQ $0x29, R8, R13
RORXQ $0x12, R8, R14
ADDQ 8(SP), R10
ORQ BX, DI
MOVQ DX, R15
RORXQ $0x22, R11, R12
XORQ R14, R13
XORQ R9, R15
RORXQ $0x0e, R8, R14
XORQ R14, R13
RORXQ $0x27, R11, R14
ANDQ R8, R15
ADDQ R10, CX
ANDQ AX, DI
XORQ R12, R14
RORXQ $0x1c, R11, R12
XORQ R9, R15
XORQ R12, R14
MOVQ R11, R12
ANDQ BX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R10
ADDQ R15, CX
ADDQ R15, R10
ADDQ DI, R10
VPSRLQ $0x13, Y2, Y3
VPSLLQ $0x2d, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y2, Y3
VPSLLQ $0x03, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y6, Y6
VPSRLQ $0x06, Y6, Y8
MOVQ R10, DI
RORXQ $0x29, CX, R13
ADDQ 16(SP), R9
RORXQ $0x12, CX, R14
ORQ AX, DI
MOVQ R8, R15
XORQ DX, R15
RORXQ $0x22, R10, R12
XORQ R14, R13
ANDQ CX, R15
RORXQ $0x0e, CX, R14
ADDQ R9, BX
ANDQ R11, DI
XORQ R14, R13
RORXQ $0x27, R10, R14
XORQ DX, R15
XORQ R12, R14
RORXQ $0x1c, R10, R12
XORQ R12, R14
MOVQ R10, R12
ANDQ AX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R9
ADDQ R15, BX
ADDQ R15, R9
ADDQ DI, R9
VPSRLQ $0x13, Y6, Y3
VPSLLQ $0x2d, Y6, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y6, Y3
VPSLLQ $0x03, Y6, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y0, Y2
VPBLENDD $0xf0, Y2, Y6, Y6
MOVQ R9, DI
RORXQ $0x29, BX, R13
RORXQ $0x12, BX, R14
ADDQ 24(SP), DX
ORQ R11, DI
MOVQ CX, R15
RORXQ $0x22, R9, R12
XORQ R14, R13
XORQ R8, R15
RORXQ $0x0e, BX, R14
ANDQ BX, R15
ADDQ DX, AX
ANDQ R10, DI
XORQ R14, R13
XORQ R8, R15
RORXQ $0x27, R9, R14
ADDQ R13, R15
XORQ R12, R14
ADDQ R15, AX
RORXQ $0x1c, R9, R12
XORQ R12, R14
MOVQ R9, R12
ANDQ R11, R12
ORQ R12, DI
ADDQ R14, DX
ADDQ R15, DX
ADDQ DI, DX
VPADDQ 96(BP), Y7, Y0
VMOVDQU Y0, (SP)
ADDQ $0x80, BP
VPERM2F128 $0x03, Y5, Y6, Y0
VPALIGNR $0x08, Y5, Y0, Y0
VPADDQ Y7, Y0, Y0
VPERM2F128 $0x03, Y7, Y4, Y1
VPALIGNR $0x08, Y7, Y1, Y1
VPSRLQ $0x01, Y1, Y2
VPSLLQ $0x3f, Y1, Y3
VPOR Y2, Y3, Y3
VPSRLQ $0x07, Y1, Y8
MOVQ DX, DI
RORXQ $0x29, AX, R13
RORXQ $0x12, AX, R14
ADDQ (SP), R8
ORQ R10, DI
MOVQ BX, R15
RORXQ $0x22, DX, R12
XORQ R14, R13
XORQ CX, R15
RORXQ $0x0e, AX, R14
ANDQ AX, R15
XORQ R14, R13
RORXQ $0x27, DX, R14
ADDQ R8, R11
ANDQ R9, DI
XORQ R12, R14
RORXQ $0x1c, DX, R12
XORQ CX, R15
XORQ R12, R14
MOVQ DX, R12
ANDQ R10, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, R8
ADDQ R15, R11
ADDQ R15, R8
ADDQ DI, R8
VPSRLQ $0x08, Y1, Y2
VPSLLQ $0x38, Y1, Y1
VPOR Y2, Y1, Y1
VPXOR Y8, Y3, Y3
VPXOR Y1, Y3, Y1
VPADDQ Y1, Y0, Y0
VPERM2F128 $0x00, Y0, Y0, Y7
VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y6, Y6, Y2
VPSRLQ $0x06, Y2, Y8
MOVQ R8, DI
RORXQ $0x29, R11, R13
RORXQ $0x12, R11, R14
ADDQ 8(SP), CX
ORQ R9, DI
MOVQ AX, R15
RORXQ $0x22, R8, R12
XORQ R14, R13
XORQ BX, R15
RORXQ $0x0e, R11, R14
XORQ R14, R13
RORXQ $0x27, R8, R14
ANDQ R11, R15
ADDQ CX, R10
ANDQ DX, DI
XORQ R12, R14
RORXQ $0x1c, R8, R12
XORQ BX, R15
XORQ R12, R14
MOVQ R8, R12
ANDQ R9, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, CX
ADDQ R15, R10
ADDQ R15, CX
ADDQ DI, CX
VPSRLQ $0x13, Y2, Y3
VPSLLQ $0x2d, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y2, Y3
VPSLLQ $0x03, Y2, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y7, Y7
VPSRLQ $0x06, Y7, Y8
MOVQ CX, DI
RORXQ $0x29, R10, R13
ADDQ 16(SP), BX
RORXQ $0x12, R10, R14
ORQ DX, DI
MOVQ R11, R15
XORQ AX, R15
RORXQ $0x22, CX, R12
XORQ R14, R13
ANDQ R10, R15
RORXQ $0x0e, R10, R14
ADDQ BX, R9
ANDQ R8, DI
XORQ R14, R13
RORXQ $0x27, CX, R14
XORQ AX, R15
XORQ R12, R14
RORXQ $0x1c, CX, R12
XORQ R12, R14
MOVQ CX, R12
ANDQ DX, R12
ADDQ R13, R15
ORQ R12, DI
ADDQ R14, BX
ADDQ R15, R9
ADDQ R15, BX
ADDQ DI, BX
VPSRLQ $0x13, Y7, Y3
VPSLLQ $0x2d, Y7, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPSRLQ $0x3d, Y7, Y3
VPSLLQ $0x03, Y7, Y1
VPOR Y1, Y3, Y3
VPXOR Y3, Y8, Y8
VPADDQ Y8, Y0, Y2
VPBLENDD $0xf0, Y2, Y7, Y7
MOVQ BX, DI
RORXQ $0x29, R9, R13
RORXQ $0x12, R9, R14
ADDQ 24(SP), AX
ORQ R8, DI
MOVQ R10, R15
RORXQ $0x22, BX, R12
XORQ R14, R13
XORQ R11, R15
RORXQ $0x0e, R9, R14
ANDQ R9, R15
ADDQ AX, DX
ANDQ CX, DI
XORQ R14, R13
XORQ R11, R15
RORXQ $0x27, BX, R14
ADDQ R13, R15
XORQ R12, R14
ADDQ R15, DX
RORXQ $0x1c, BX, R12
XORQ R12, R14
MOVQ BX, R12
ANDQ R8, R12
ORQ R12, DI
ADDQ R14, AX
ADDQ R15, AX
ADDQ DI, AX
SUBQ $0x01, 32(SP)
JNE loop1
MOVQ $0x00000002, 32(SP)
loop2:
VPADDQ (BP), Y4, Y0
VMOVDQU Y0, (SP)
MOVQ R9, R15
RORXQ $0x29, DX, R13
RORXQ $0x12, DX, R14
XORQ R10, R15
XORQ R14, R13
RORXQ $0x0e, DX, R14
ANDQ DX, R15
XORQ R14, R13
RORXQ $0x22, AX, R12
XORQ R10, R15
RORXQ $0x27, AX, R14
MOVQ AX, DI
XORQ R12, R14
RORXQ $0x1c, AX, R12
ADDQ (SP), R11
ORQ CX, DI
XORQ R12, R14
MOVQ AX, R12
ANDQ BX, DI
ANDQ CX, R12
ADDQ R13, R15
ADDQ R11, R8
ORQ R12, DI
ADDQ R14, R11
ADDQ R15, R8
ADDQ R15, R11
MOVQ DX, R15
RORXQ $0x29, R8, R13
RORXQ $0x12, R8, R14
XORQ R9, R15
XORQ R14, R13
RORXQ $0x0e, R8, R14
ANDQ R8, R15
ADDQ DI, R11
XORQ R14, R13
RORXQ $0x22, R11, R12
XORQ R9, R15
RORXQ $0x27, R11, R14
MOVQ R11, DI
XORQ R12, R14
RORXQ $0x1c, R11, R12
ADDQ 8(SP), R10
ORQ BX, DI
XORQ R12, R14
MOVQ R11, R12
ANDQ AX, DI
ANDQ BX, R12
ADDQ R13, R15
ADDQ R10, CX
ORQ R12, DI
ADDQ R14, R10
ADDQ R15, CX
ADDQ R15, R10
MOVQ R8, R15
RORXQ $0x29, CX, R13
RORXQ $0x12, CX, R14
XORQ DX, R15
XORQ R14, R13
RORXQ $0x0e, CX, R14
ANDQ CX, R15
ADDQ DI, R10
XORQ R14, R13
RORXQ $0x22, R10, R12
XORQ DX, R15
RORXQ $0x27, R10, R14
MOVQ R10, DI
XORQ R12, R14
RORXQ $0x1c, R10, R12
ADDQ 16(SP), R9
ORQ AX, DI
XORQ R12, R14
MOVQ R10, R12
ANDQ R11, DI
ANDQ AX, R12
ADDQ R13, R15
ADDQ R9, BX
ORQ R12, DI
ADDQ R14, R9
ADDQ R15, BX
ADDQ R15, R9
MOVQ CX, R15
RORXQ $0x29, BX, R13
RORXQ $0x12, BX, R14
XORQ R8, R15
XORQ R14, R13
RORXQ $0x0e, BX, R14
ANDQ BX, R15
ADDQ DI, R9
XORQ R14, R13
RORXQ $0x22, R9, R12
XORQ R8, R15
RORXQ $0x27, R9, R14
MOVQ R9, DI
XORQ R12, R14
RORXQ $0x1c, R9, R12
ADDQ 24(SP), DX
ORQ R11, DI
XORQ R12, R14
MOVQ R9, R12
ANDQ R10, DI
ANDQ R11, R12
ADDQ R13, R15
ADDQ DX, AX
ORQ R12, DI
ADDQ R14, DX
ADDQ R15, AX
ADDQ R15, DX
ADDQ DI, DX
VPADDQ 32(BP), Y5, Y0
VMOVDQU Y0, (SP)
ADDQ $0x40, BP
MOVQ BX, R15
RORXQ $0x29, AX, R13
RORXQ $0x12, AX, R14
XORQ CX, R15
XORQ R14, R13
RORXQ $0x0e, AX, R14
ANDQ AX, R15
XORQ R14, R13
RORXQ $0x22, DX, R12
XORQ CX, R15
RORXQ $0x27, DX, R14
MOVQ DX, DI
XORQ R12, R14
RORXQ $0x1c, DX, R12
ADDQ (SP), R8
ORQ R10, DI
XORQ R12, R14
MOVQ DX, R12
ANDQ R9, DI
ANDQ R10, R12
ADDQ R13, R15
ADDQ R8, R11
ORQ R12, DI
ADDQ R14, R8
ADDQ R15, R11
ADDQ R15, R8
MOVQ AX, R15
RORXQ $0x29, R11, R13
RORXQ $0x12, R11, R14
XORQ BX, R15
XORQ R14, R13
RORXQ $0x0e, R11, R14
ANDQ R11, R15
ADDQ DI, R8
XORQ R14, R13
RORXQ $0x22, R8, R12
XORQ BX, R15
RORXQ $0x27, R8, R14
MOVQ R8, DI
XORQ R12, R14
RORXQ $0x1c, R8, R12
ADDQ 8(SP), CX
ORQ R9, DI
XORQ R12, R14
MOVQ R8, R12
ANDQ DX, DI
ANDQ R9, R12
ADDQ R13, R15
ADDQ CX, R10
ORQ R12, DI
ADDQ R14, CX
ADDQ R15, R10
ADDQ R15, CX
MOVQ R11, R15
RORXQ $0x29, R10, R13
RORXQ $0x12, R10, R14
XORQ AX, R15
XORQ R14, R13
RORXQ $0x0e, R10, R14
ANDQ R10, R15
ADDQ DI, CX
XORQ R14, R13
RORXQ $0x22, CX, R12
XORQ AX, R15
RORXQ $0x27, CX, R14
MOVQ CX, DI
XORQ R12, R14
RORXQ $0x1c, CX, R12
ADDQ 16(SP), BX
ORQ DX, DI
XORQ R12, R14
MOVQ CX, R12
ANDQ R8, DI
ANDQ DX, R12
ADDQ R13, R15
ADDQ BX, R9
ORQ R12, DI
ADDQ R14, BX
ADDQ R15, R9
ADDQ R15, BX
MOVQ R10, R15
RORXQ $0x29, R9, R13
RORXQ $0x12, R9, R14
XORQ R11, R15
XORQ R14, R13
RORXQ $0x0e, R9, R14
ANDQ R9, R15
ADDQ DI, BX
XORQ R14, R13
RORXQ $0x22, BX, R12
XORQ R11, R15
RORXQ $0x27, BX, R14
MOVQ BX, DI
XORQ R12, R14
RORXQ $0x1c, BX, R12
ADDQ 24(SP), AX
ORQ R8, DI
XORQ R12, R14
MOVQ BX, R12
ANDQ CX, DI
ANDQ R8, R12
ADDQ R13, R15
ADDQ AX, DX
ORQ R12, DI
ADDQ R14, AX
ADDQ R15, DX
ADDQ R15, AX
ADDQ DI, AX
VMOVDQU Y6, Y4
VMOVDQU Y7, Y5
SUBQ $0x01, 32(SP)
JNE loop2
ADDQ (SI), AX
MOVQ AX, (SI)
ADDQ 8(SI), BX
MOVQ BX, 8(SI)
ADDQ 16(SI), CX
MOVQ CX, 16(SI)
ADDQ 24(SI), R8
MOVQ R8, 24(SI)
ADDQ 32(SI), DX
MOVQ DX, 32(SI)
ADDQ 40(SI), R9
MOVQ R9, 40(SI)
ADDQ 48(SI), R10
MOVQ R10, 48(SI)
ADDQ 56(SI), R11
MOVQ R11, 56(SI)
MOVQ 40(SP), DI
ADDQ $0x80, DI
CMPQ DI, 48(SP)
JNE loop0
done_hash:
VZEROUPPER
RET
DATA PSHUFFLE_BYTE_FLIP_MASK<>+0(SB)/8, $0x0001020304050607
DATA PSHUFFLE_BYTE_FLIP_MASK<>+8(SB)/8, $0x08090a0b0c0d0e0f
DATA PSHUFFLE_BYTE_FLIP_MASK<>+16(SB)/8, $0x1011121314151617
DATA PSHUFFLE_BYTE_FLIP_MASK<>+24(SB)/8, $0x18191a1b1c1d1e1f
GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), RODATA|NOPTR, $32
DATA MASK_YMM_LO<>+0(SB)/8, $0x0000000000000000
DATA MASK_YMM_LO<>+8(SB)/8, $0x0000000000000000
DATA MASK_YMM_LO<>+16(SB)/8, $0xffffffffffffffff
DATA MASK_YMM_LO<>+24(SB)/8, $0xffffffffffffffff
GLOBL MASK_YMM_LO<>(SB), RODATA|NOPTR, $32
 |
The pages are generated with Golds v0.7.7-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |