// Code generated by mkasm.go. DO NOT EDIT.

#include "go_asm.h"
#include "textflag.h"

GLOBL ·gcExpandersAVX512(SB), RODATA, $0x220
DATA  ·gcExpandersAVX512+0x00(SB)/8, $0
DATA  ·gcExpandersAVX512+0x08(SB)/8, $expandAVX512_1<>(SB)
DATA  ·gcExpandersAVX512+0x10(SB)/8, $expandAVX512_2<>(SB)
DATA  ·gcExpandersAVX512+0x18(SB)/8, $expandAVX512_3<>(SB)
DATA  ·gcExpandersAVX512+0x20(SB)/8, $expandAVX512_4<>(SB)
DATA  ·gcExpandersAVX512+0x28(SB)/8, $expandAVX512_6<>(SB)
DATA  ·gcExpandersAVX512+0x30(SB)/8, $expandAVX512_8<>(SB)
DATA  ·gcExpandersAVX512+0x38(SB)/8, $expandAVX512_10<>(SB)
DATA  ·gcExpandersAVX512+0x40(SB)/8, $expandAVX512_12<>(SB)
DATA  ·gcExpandersAVX512+0x48(SB)/8, $expandAVX512_14<>(SB)
DATA  ·gcExpandersAVX512+0x50(SB)/8, $expandAVX512_16<>(SB)
DATA  ·gcExpandersAVX512+0x58(SB)/8, $expandAVX512_18<>(SB)
DATA  ·gcExpandersAVX512+0x60(SB)/8, $expandAVX512_20<>(SB)
DATA  ·gcExpandersAVX512+0x68(SB)/8, $expandAVX512_22<>(SB)
DATA  ·gcExpandersAVX512+0x70(SB)/8, $expandAVX512_24<>(SB)
DATA  ·gcExpandersAVX512+0x78(SB)/8, $expandAVX512_26<>(SB)
DATA  ·gcExpandersAVX512+0x80(SB)/8, $expandAVX512_28<>(SB)
DATA  ·gcExpandersAVX512+0x88(SB)/8, $expandAVX512_30<>(SB)
DATA  ·gcExpandersAVX512+0x90(SB)/8, $expandAVX512_32<>(SB)
DATA  ·gcExpandersAVX512+0x98(SB)/8, $expandAVX512_36<>(SB)
DATA  ·gcExpandersAVX512+0xa0(SB)/8, $expandAVX512_40<>(SB)
DATA  ·gcExpandersAVX512+0xa8(SB)/8, $expandAVX512_44<>(SB)
DATA  ·gcExpandersAVX512+0xb0(SB)/8, $expandAVX512_48<>(SB)
DATA  ·gcExpandersAVX512+0xb8(SB)/8, $expandAVX512_52<>(SB)
DATA  ·gcExpandersAVX512+0xc0(SB)/8, $expandAVX512_56<>(SB)
DATA  ·gcExpandersAVX512+0xc8(SB)/8, $expandAVX512_60<>(SB)
DATA  ·gcExpandersAVX512+0xd0(SB)/8, $expandAVX512_64<>(SB)
DATA  ·gcExpandersAVX512+0xd8(SB)/8, $0
DATA  ·gcExpandersAVX512+0xe0(SB)/8, $0
DATA  ·gcExpandersAVX512+0xe8(SB)/8, $0
DATA  ·gcExpandersAVX512+0xf0(SB)/8, $0
DATA  ·gcExpandersAVX512+0xf8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x100(SB)/8, $0
DATA  ·gcExpandersAVX512+0x108(SB)/8, $0
DATA  ·gcExpandersAVX512+0x110(SB)/8, $0
DATA  ·gcExpandersAVX512+0x118(SB)/8, $0
DATA  ·gcExpandersAVX512+0x120(SB)/8, $0
DATA  ·gcExpandersAVX512+0x128(SB)/8, $0
DATA  ·gcExpandersAVX512+0x130(SB)/8, $0
DATA  ·gcExpandersAVX512+0x138(SB)/8, $0
DATA  ·gcExpandersAVX512+0x140(SB)/8, $0
DATA  ·gcExpandersAVX512+0x148(SB)/8, $0
DATA  ·gcExpandersAVX512+0x150(SB)/8, $0
DATA  ·gcExpandersAVX512+0x158(SB)/8, $0
DATA  ·gcExpandersAVX512+0x160(SB)/8, $0
DATA  ·gcExpandersAVX512+0x168(SB)/8, $0
DATA  ·gcExpandersAVX512+0x170(SB)/8, $0
DATA  ·gcExpandersAVX512+0x178(SB)/8, $0
DATA  ·gcExpandersAVX512+0x180(SB)/8, $0
DATA  ·gcExpandersAVX512+0x188(SB)/8, $0
DATA  ·gcExpandersAVX512+0x190(SB)/8, $0
DATA  ·gcExpandersAVX512+0x198(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1a0(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1a8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1b0(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1b8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1c0(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1c8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1d0(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1d8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1e0(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1e8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1f0(SB)/8, $0
DATA  ·gcExpandersAVX512+0x1f8(SB)/8, $0
DATA  ·gcExpandersAVX512+0x200(SB)/8, $0
DATA  ·gcExpandersAVX512+0x208(SB)/8, $0
DATA  ·gcExpandersAVX512+0x210(SB)/8, $0
DATA  ·gcExpandersAVX512+0x218(SB)/8, $0

TEXT expandAVX512_1<>(SB), NOSPLIT, $0-0
	VMOVDQU64 (AX), Z1
	VMOVDQU64 64(AX), Z2
	RET

GLOBL expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
DATA  expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
DATA  expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
DATA  expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
DATA  expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918

GLOBL expandAVX512_2_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
DATA  expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
DATA  expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
DATA  expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
DATA  expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
DATA  expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
DATA  expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
DATA  expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080

GLOBL expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
DATA  expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
DATA  expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
DATA  expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
DATA  expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
DATA  expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
DATA  expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
DATA  expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938

GLOBL expandAVX512_2_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
DATA  expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
DATA  expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
DATA  expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
DATA  expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
DATA  expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
DATA  expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
DATA  expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34

TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_2_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_2_mat0<>(SB), Z1
	VMOVDQU64 expandAVX512_2_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_2_outShufLo(SB), Z3
	VMOVDQU64 (AX), Z4
	VPERMB Z4, Z0, Z0
	VGF2P8AFFINEQB $0, Z1, Z0, Z0
	VPERMB Z4, Z2, Z2
	VGF2P8AFFINEQB $0, Z1, Z2, Z2
	VPERMB Z0, Z3, Z1
	VPERMB Z2, Z3, Z2
	RET

GLOBL expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
DATA  expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
DATA  expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
DATA  expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_3_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
DATA  expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
DATA  expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
DATA  expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
DATA  expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
DATA  expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
DATA  expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
DATA  expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
DATA  expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
DATA  expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
DATA  expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
DATA  expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
DATA  expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
DATA  expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
DATA  expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
DATA  expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_3_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
DATA  expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
DATA  expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
DATA  expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
DATA  expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
DATA  expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
DATA  expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
DATA  expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352

GLOBL expandAVX512_3_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
DATA  expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
DATA  expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
DATA  expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
DATA  expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
DATA  expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
DATA  expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
DATA  expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058

TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_3_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_3_mat0<>(SB), Z3
	VMOVDQU64 expandAVX512_3_inShuf1<>(SB), Z4
	VMOVDQU64 expandAVX512_3_inShuf2<>(SB), Z5
	VMOVDQU64 expandAVX512_3_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_3_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z6
	VPERMB Z6, Z0, Z0
	VGF2P8AFFINEQB $0, Z3, Z0, Z0
	VPERMB Z6, Z4, Z4
	VGF2P8AFFINEQB $0, Z3, Z4, Z4
	VPERMB Z6, Z5, Z5
	VGF2P8AFFINEQB $0, Z3, Z5, Z3
	VPERMI2B Z4, Z0, Z1
	VPERMI2B Z3, Z4, Z2
	RET

GLOBL expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
DATA  expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
DATA  expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
DATA  expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
DATA  expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908

GLOBL expandAVX512_4_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
DATA  expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
DATA  expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
DATA  expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
DATA  expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
DATA  expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
DATA  expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
DATA  expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080

GLOBL expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
DATA  expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
DATA  expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
DATA  expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
DATA  expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918

GLOBL expandAVX512_4_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
DATA  expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
DATA  expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
DATA  expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
DATA  expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
DATA  expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
DATA  expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
DATA  expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26

TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_4_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_4_mat0<>(SB), Z1
	VMOVDQU64 expandAVX512_4_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_4_outShufLo(SB), Z3
	VMOVDQU64 (AX), Z4
	VPERMB Z4, Z0, Z0
	VGF2P8AFFINEQB $0, Z1, Z0, Z0
	VPERMB Z4, Z2, Z2
	VGF2P8AFFINEQB $0, Z1, Z2, Z2
	VPERMB Z0, Z3, Z1
	VPERMB Z2, Z3, Z2
	RET

GLOBL expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
DATA  expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
DATA  expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
DATA  expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
DATA  expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
DATA  expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
DATA  expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_6_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
DATA  expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
DATA  expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
DATA  expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
DATA  expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
DATA  expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
DATA  expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
DATA  expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
DATA  expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
DATA  expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
DATA  expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
DATA  expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
DATA  expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_6_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
DATA  expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
DATA  expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
DATA  expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
DATA  expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
DATA  expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
DATA  expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
DATA  expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951

GLOBL expandAVX512_6_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
DATA  expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
DATA  expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
DATA  expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
DATA  expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
DATA  expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
DATA  expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
DATA  expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44

TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_6_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_6_mat0<>(SB), Z3
	VMOVDQU64 expandAVX512_6_inShuf1<>(SB), Z4
	VMOVDQU64 expandAVX512_6_inShuf2<>(SB), Z5
	VMOVDQU64 expandAVX512_6_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_6_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z6
	VPERMB Z6, Z0, Z0
	VGF2P8AFFINEQB $0, Z3, Z0, Z0
	VPERMB Z6, Z4, Z4
	VGF2P8AFFINEQB $0, Z3, Z4, Z4
	VPERMB Z6, Z5, Z5
	VGF2P8AFFINEQB $0, Z3, Z5, Z3
	VPERMI2B Z4, Z0, Z1
	VPERMI2B Z3, Z4, Z2
	RET

GLOBL expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
DATA  expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100

GLOBL expandAVX512_8_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908

GLOBL expandAVX512_8_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
DATA  expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
DATA  expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
DATA  expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
DATA  expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
DATA  expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
DATA  expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
DATA  expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07

TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_8_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_8_mat0<>(SB), Z1
	VMOVDQU64 expandAVX512_8_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_8_outShufLo(SB), Z3
	VMOVDQU64 (AX), Z4
	VPERMB Z4, Z0, Z0
	VGF2P8AFFINEQB $0, Z1, Z0, Z0
	VPERMB Z4, Z2, Z2
	VGF2P8AFFINEQB $0, Z1, Z2, Z2
	VPERMB Z0, Z3, Z1
	VPERMB Z2, Z3, Z2
	RET

GLOBL expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
DATA  expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
DATA  expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
DATA  expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
DATA  expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
DATA  expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
DATA  expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
DATA  expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100

GLOBL expandAVX512_10_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
DATA  expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
DATA  expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
DATA  expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
DATA  expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
DATA  expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
DATA  expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040

GLOBL expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
DATA  expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
DATA  expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
DATA  expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
DATA  expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
DATA  expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
DATA  expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
DATA  expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706

GLOBL expandAVX512_10_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
DATA  expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
DATA  expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
DATA  expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
DATA  expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
DATA  expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
DATA  expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
DATA  expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
DATA  expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
DATA  expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
DATA  expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
DATA  expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_10_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
DATA  expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
DATA  expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
DATA  expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_10_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
DATA  expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
DATA  expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
DATA  expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
DATA  expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
DATA  expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
DATA  expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
DATA  expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35

GLOBL expandAVX512_10_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
DATA  expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
DATA  expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
DATA  expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
DATA  expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
DATA  expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
DATA  expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
DATA  expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45

TEXT expandAVX512_10<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_10_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_10_inShuf1<>(SB), Z3
	VMOVDQU64 expandAVX512_10_inShuf2<>(SB), Z4
	VMOVDQU64 expandAVX512_10_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_10_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z5
	VPERMB Z5, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_10_mat0<>(SB), Z0, Z0
	VPERMB Z5, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_10_mat1<>(SB), Z3, Z3
	VPERMB Z5, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_10_mat2<>(SB), Z4, Z4
	VPERMI2B Z3, Z0, Z1
	VPERMI2B Z4, Z3, Z2
	RET

GLOBL expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
DATA  expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
DATA  expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
DATA  expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
DATA  expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100

GLOBL expandAVX512_12_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
DATA  expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
DATA  expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
DATA  expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
DATA  expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605

GLOBL expandAVX512_12_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
DATA  expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
DATA  expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
DATA  expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
DATA  expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
DATA  expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
DATA  expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
DATA  expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706

GLOBL expandAVX512_12_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
DATA  expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
DATA  expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404

GLOBL expandAVX512_12_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
DATA  expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
DATA  expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
DATA  expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
DATA  expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
DATA  expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
DATA  expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
DATA  expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44

GLOBL expandAVX512_12_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
DATA  expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
DATA  expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
DATA  expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
DATA  expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
DATA  expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
DATA  expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
DATA  expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64

TEXT expandAVX512_12<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_12_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_12_inShuf1<>(SB), Z3
	VMOVDQU64 expandAVX512_12_inShuf2<>(SB), Z4
	VMOVDQU64 expandAVX512_12_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_12_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z5
	VPERMB Z5, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_12_mat0<>(SB), Z0, Z0
	VPERMB Z5, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_12_mat1<>(SB), Z3, Z3
	VPERMB Z5, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_12_mat2<>(SB), Z4, Z4
	VPERMI2B Z3, Z0, Z1
	VPERMI2B Z4, Z3, Z2
	RET

GLOBL expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
DATA  expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100

GLOBL expandAVX512_14_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
DATA  expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
DATA  expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
DATA  expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
DATA  expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
DATA  expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010

GLOBL expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
DATA  expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
DATA  expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
DATA  expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
DATA  expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
DATA  expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
DATA  expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
DATA  expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504

GLOBL expandAVX512_14_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
DATA  expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
DATA  expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
DATA  expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
DATA  expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
DATA  expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020

GLOBL expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
DATA  expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
DATA  expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
DATA  expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
DATA  expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
DATA  expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
DATA  expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
DATA  expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605

GLOBL expandAVX512_14_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
DATA  expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
DATA  expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
DATA  expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
DATA  expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404

GLOBL expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
DATA  expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
DATA  expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
DATA  expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
DATA  expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_14_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
DATA  expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
DATA  expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
DATA  expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
DATA  expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_14_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
DATA  expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
DATA  expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
DATA  expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
DATA  expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
DATA  expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
DATA  expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
DATA  expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04

GLOBL expandAVX512_14_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
DATA  expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
DATA  expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
DATA  expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
DATA  expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
DATA  expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
DATA  expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
DATA  expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34

GLOBL expandAVX512_14_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
DATA  expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
DATA  expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
DATA  expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
DATA  expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
DATA  expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff

TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_14_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_14_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_14_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_14_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_14_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_14_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_14_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_14_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_14_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_14_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_14_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xff0ffc3ff0ffc3ff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0xf003c00f003c00, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
DATA  expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000

GLOBL expandAVX512_16_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
DATA  expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404

GLOBL expandAVX512_16_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
DATA  expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
DATA  expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
DATA  expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
DATA  expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
DATA  expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
DATA  expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
DATA  expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726

TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_16_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_16_mat0<>(SB), Z1
	VMOVDQU64 expandAVX512_16_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_16_outShufLo(SB), Z3
	VMOVDQU64 (AX), Z4
	VPERMB Z4, Z0, Z0
	VGF2P8AFFINEQB $0, Z1, Z0, Z0
	VPERMB Z4, Z2, Z2
	VGF2P8AFFINEQB $0, Z1, Z2, Z2
	VPERMB Z0, Z3, Z1
	VPERMB Z2, Z3, Z2
	RET

GLOBL expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
DATA  expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
DATA  expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
DATA  expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
DATA  expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
DATA  expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
DATA  expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
DATA  expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000

GLOBL expandAVX512_18_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
DATA  expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
DATA  expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
DATA  expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
DATA  expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
DATA  expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010

GLOBL expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
DATA  expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
DATA  expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
DATA  expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
DATA  expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
DATA  expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
DATA  expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
DATA  expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403

GLOBL expandAVX512_18_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
DATA  expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
DATA  expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
DATA  expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
DATA  expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020

GLOBL expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
DATA  expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
DATA  expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
DATA  expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
DATA  expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
DATA  expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
DATA  expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
DATA  expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504

GLOBL expandAVX512_18_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
DATA  expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
DATA  expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
DATA  expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
DATA  expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
DATA  expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
DATA  expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202

GLOBL expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
DATA  expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
DATA  expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
DATA  expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
DATA  expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_18_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
DATA  expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
DATA  expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
DATA  expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_18_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
DATA  expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
DATA  expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
DATA  expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
DATA  expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
DATA  expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
DATA  expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
DATA  expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b

GLOBL expandAVX512_18_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
DATA  expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
DATA  expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
DATA  expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
DATA  expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
DATA  expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
DATA  expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
DATA  expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43

GLOBL expandAVX512_18_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
DATA  expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
DATA  expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
DATA  expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
DATA  expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
DATA  expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff

TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_18_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_18_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_18_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_18_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_18_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_18_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_18_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_18_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_18_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_18_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_18_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xffe0fff83ffe0fff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x1f0007c001f000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
DATA  expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
DATA  expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
DATA  expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
DATA  expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
DATA  expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
DATA  expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
DATA  expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100

GLOBL expandAVX512_20_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
DATA  expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
DATA  expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
DATA  expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
DATA  expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
DATA  expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
DATA  expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
DATA  expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303

GLOBL expandAVX512_20_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
DATA  expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
DATA  expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
DATA  expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808

GLOBL expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
DATA  expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
DATA  expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
DATA  expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
DATA  expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
DATA  expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
DATA  expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
DATA  expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504

GLOBL expandAVX512_20_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
DATA  expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
DATA  expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
DATA  expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
DATA  expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
DATA  expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202

GLOBL expandAVX512_20_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
DATA  expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
DATA  expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
DATA  expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
DATA  expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
DATA  expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
DATA  expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
DATA  expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d

GLOBL expandAVX512_20_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
DATA  expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
DATA  expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
DATA  expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
DATA  expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
DATA  expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
DATA  expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
DATA  expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574

TEXT expandAVX512_20<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_20_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_20_inShuf1<>(SB), Z3
	VMOVDQU64 expandAVX512_20_inShuf2<>(SB), Z4
	VMOVDQU64 expandAVX512_20_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_20_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z5
	VPERMB Z5, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_20_mat0<>(SB), Z0, Z0
	VPERMB Z5, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_20_mat1<>(SB), Z3, Z3
	VPERMB Z5, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_20_mat2<>(SB), Z4, Z4
	VPERMI2B Z3, Z0, Z1
	VPERMI2B Z4, Z3, Z2
	RET

GLOBL expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
DATA  expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
DATA  expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
DATA  expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
DATA  expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
DATA  expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
DATA  expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
DATA  expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000

GLOBL expandAVX512_22_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
DATA  expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
DATA  expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
DATA  expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
DATA  expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
DATA  expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010

GLOBL expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
DATA  expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
DATA  expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
DATA  expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
DATA  expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
DATA  expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
DATA  expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
DATA  expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303

GLOBL expandAVX512_22_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
DATA  expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
DATA  expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
DATA  expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
DATA  expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
DATA  expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101

GLOBL expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
DATA  expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
DATA  expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
DATA  expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
DATA  expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
DATA  expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
DATA  expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
DATA  expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403

GLOBL expandAVX512_22_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
DATA  expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
DATA  expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
DATA  expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020

GLOBL expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
DATA  expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
DATA  expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
DATA  expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
DATA  expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_22_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
DATA  expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
DATA  expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
DATA  expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_22_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
DATA  expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
DATA  expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
DATA  expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
DATA  expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
DATA  expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
DATA  expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
DATA  expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d

GLOBL expandAVX512_22_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
DATA  expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
DATA  expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
DATA  expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
DATA  expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
DATA  expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
DATA  expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
DATA  expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d

GLOBL expandAVX512_22_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
DATA  expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
DATA  expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
DATA  expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff

TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_22_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_22_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_22_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_22_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_22_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_22_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_22_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_22_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_22_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_22_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_22_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xffff03fffc0ffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0xf0000fc0003f0000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
DATA  expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
DATA  expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
DATA  expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
DATA  expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
DATA  expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
DATA  expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
DATA  expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000

GLOBL expandAVX512_24_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
DATA  expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
DATA  expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202

GLOBL expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
DATA  expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
DATA  expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
DATA  expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
DATA  expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
DATA  expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
DATA  expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
DATA  expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05

GLOBL expandAVX512_24_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
DATA  expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
DATA  expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101

GLOBL expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
DATA  expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_24_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
DATA  expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
DATA  expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
DATA  expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
DATA  expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_24_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
DATA  expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
DATA  expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
DATA  expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
DATA  expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
DATA  expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
DATA  expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
DATA  expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50

GLOBL expandAVX512_24_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
DATA  expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
DATA  expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
DATA  expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
DATA  expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
DATA  expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
DATA  expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
DATA  expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746

GLOBL expandAVX512_24_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff

TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_24_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_24_mat0<>(SB), Z2
	VMOVDQU64 expandAVX512_24_inShuf1<>(SB), Z3
	VMOVDQU64 expandAVX512_24_inShuf2<>(SB), Z4
	VMOVDQU64 expandAVX512_24_inShuf3<>(SB), Z5
	VMOVDQU64 expandAVX512_24_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_24_outShufHi0(SB), Z6
	VMOVDQU64 expandAVX512_24_outShufHi1(SB), Z7
	VMOVDQU64 (AX), Z8
	VPERMB Z8, Z0, Z0
	VGF2P8AFFINEQB $0, Z2, Z0, Z0
	VPERMB Z8, Z3, Z3
	VGF2P8AFFINEQB $0, Z2, Z3, Z2
	VPERMB Z8, Z4, Z3
	VGF2P8AFFINEQB $0, expandAVX512_24_mat2<>(SB), Z3, Z3
	VPERMB Z8, Z5, Z4
	VGF2P8AFFINEQB $0, expandAVX512_24_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xdfffffffffffffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z6
	MOVQ $0x2000000000000000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z7, K1, Z0
	VPORQ Z0, Z6, Z2
	RET

GLOBL expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
DATA  expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
DATA  expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
DATA  expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
DATA  expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
DATA  expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
DATA  expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
DATA  expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000

GLOBL expandAVX512_26_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
DATA  expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
DATA  expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
DATA  expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
DATA  expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
DATA  expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010

GLOBL expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
DATA  expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
DATA  expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
DATA  expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302

GLOBL expandAVX512_26_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
DATA  expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
DATA  expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
DATA  expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
DATA  expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
DATA  expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808

GLOBL expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
DATA  expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
DATA  expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
DATA  expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
DATA  expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
DATA  expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
DATA  expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
DATA  expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303

GLOBL expandAVX512_26_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
DATA  expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
DATA  expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
DATA  expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
DATA  expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
DATA  expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
DATA  expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
DATA  expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101

GLOBL expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
DATA  expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
DATA  expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
DATA  expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
DATA  expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
DATA  expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
DATA  expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_26_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
DATA  expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
DATA  expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
DATA  expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
DATA  expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_26_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
DATA  expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
DATA  expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
DATA  expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
DATA  expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
DATA  expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
DATA  expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
DATA  expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514

GLOBL expandAVX512_26_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
DATA  expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
DATA  expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
DATA  expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
DATA  expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
DATA  expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
DATA  expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
DATA  expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a

GLOBL expandAVX512_26_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
DATA  expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
DATA  expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
DATA  expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff

TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_26_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_26_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_26_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_26_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_26_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_26_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_26_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_26_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_26_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_26_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_26_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xff7c07ffff01ffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x83f80000fe0000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
DATA  expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
DATA  expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
DATA  expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
DATA  expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
DATA  expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
DATA  expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100

GLOBL expandAVX512_28_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
DATA  expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
DATA  expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
DATA  expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
DATA  expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302

GLOBL expandAVX512_28_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
DATA  expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
DATA  expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808

GLOBL expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
DATA  expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
DATA  expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
DATA  expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
DATA  expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
DATA  expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303

GLOBL expandAVX512_28_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
DATA  expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
DATA  expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
DATA  expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
DATA  expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
DATA  expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
DATA  expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
DATA  expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101

GLOBL expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
DATA  expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
DATA  expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
DATA  expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_28_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
DATA  expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
DATA  expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
DATA  expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_28_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
DATA  expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
DATA  expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
DATA  expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
DATA  expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
DATA  expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
DATA  expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
DATA  expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706

GLOBL expandAVX512_28_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
DATA  expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
DATA  expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
DATA  expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
DATA  expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
DATA  expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
DATA  expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
DATA  expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736

GLOBL expandAVX512_28_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
DATA  expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
DATA  expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
DATA  expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff

TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_28_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_28_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_28_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_28_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_28_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_28_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_28_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_28_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_28_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_28_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_28_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xdf87fffff87fffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x2078000007800000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
DATA  expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
DATA  expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
DATA  expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
DATA  expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
DATA  expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000

GLOBL expandAVX512_30_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
DATA  expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
DATA  expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
DATA  expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
DATA  expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
DATA  expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010

GLOBL expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
DATA  expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
DATA  expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
DATA  expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202

GLOBL expandAVX512_30_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
DATA  expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
DATA  expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
DATA  expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
DATA  expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
DATA  expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202

GLOBL expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
DATA  expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
DATA  expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
DATA  expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
DATA  expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
DATA  expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302

GLOBL expandAVX512_30_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
DATA  expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
DATA  expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
DATA  expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
DATA  expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
DATA  expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040

GLOBL expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
DATA  expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
DATA  expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
DATA  expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
DATA  expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
DATA  expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_30_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
DATA  expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
DATA  expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
DATA  expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
DATA  expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
DATA  expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
DATA  expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_30_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
DATA  expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
DATA  expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
DATA  expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
DATA  expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
DATA  expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
DATA  expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
DATA  expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61

GLOBL expandAVX512_30_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
DATA  expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
DATA  expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
DATA  expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
DATA  expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
DATA  expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
DATA  expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
DATA  expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff

GLOBL expandAVX512_30_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
DATA  expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
DATA  expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
DATA  expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b

TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_30_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_30_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_30_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_30_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_30_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_30_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_30_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_30_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_30_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_30_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_30_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xb001ffffc007ffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x4ffe00003ff80000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
DATA  expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000

GLOBL expandAVX512_32_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
DATA  expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202

GLOBL expandAVX512_32_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
DATA  expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
DATA  expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
DATA  expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
DATA  expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
DATA  expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
DATA  expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
DATA  expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534

TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_32_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_32_mat0<>(SB), Z1
	VMOVDQU64 expandAVX512_32_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_32_outShufLo(SB), Z3
	VMOVDQU64 (AX), Z4
	VPERMB Z4, Z0, Z0
	VGF2P8AFFINEQB $0, Z1, Z0, Z0
	VPERMB Z4, Z2, Z2
	VGF2P8AFFINEQB $0, Z1, Z2, Z2
	VPERMB Z0, Z3, Z1
	VPERMB Z2, Z3, Z2
	RET

GLOBL expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
DATA  expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
DATA  expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
DATA  expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
DATA  expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
DATA  expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100

GLOBL expandAVX512_36_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
DATA  expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
DATA  expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
DATA  expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
DATA  expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
DATA  expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
DATA  expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202

GLOBL expandAVX512_36_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
DATA  expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
DATA  expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
DATA  expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101

GLOBL expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
DATA  expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
DATA  expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
DATA  expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
DATA  expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
DATA  expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202

GLOBL expandAVX512_36_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
DATA  expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
DATA  expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
DATA  expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
DATA  expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
DATA  expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020

GLOBL expandAVX512_36_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
DATA  expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
DATA  expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
DATA  expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
DATA  expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
DATA  expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
DATA  expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
DATA  expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736

GLOBL expandAVX512_36_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
DATA  expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
DATA  expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
DATA  expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
DATA  expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
DATA  expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
DATA  expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
DATA  expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957

TEXT expandAVX512_36<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_36_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_36_inShuf1<>(SB), Z3
	VMOVDQU64 expandAVX512_36_inShuf2<>(SB), Z4
	VMOVDQU64 expandAVX512_36_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_36_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z5
	VPERMB Z5, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_36_mat0<>(SB), Z0, Z0
	VPERMB Z5, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_36_mat1<>(SB), Z3, Z3
	VPERMB Z5, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_36_mat2<>(SB), Z4, Z4
	VPERMI2B Z3, Z0, Z1
	VPERMI2B Z4, Z3, Z2
	RET

GLOBL expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
DATA  expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
DATA  expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
DATA  expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
DATA  expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
DATA  expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
DATA  expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
DATA  expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000

GLOBL expandAVX512_40_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
DATA  expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
DATA  expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
DATA  expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101

GLOBL expandAVX512_40_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
DATA  expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
DATA  expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040

GLOBL expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
DATA  expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
DATA  expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
DATA  expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
DATA  expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
DATA  expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202

GLOBL expandAVX512_40_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
DATA  expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
DATA  expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
DATA  expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
DATA  expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_40_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
DATA  expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
DATA  expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
DATA  expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_40_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
DATA  expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
DATA  expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
DATA  expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
DATA  expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
DATA  expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
DATA  expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
DATA  expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e

GLOBL expandAVX512_40_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
DATA  expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
DATA  expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
DATA  expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
DATA  expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
DATA  expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
DATA  expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
DATA  expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d

GLOBL expandAVX512_40_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff

TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_40_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_40_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_40_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_40_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_40_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_40_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_40_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_40_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_40_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_40_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_40_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xe7ffffffffffffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x1800000000000000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
DATA  expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
DATA  expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
DATA  expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
DATA  expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
DATA  expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00

GLOBL expandAVX512_44_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
DATA  expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
DATA  expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
DATA  expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101

GLOBL expandAVX512_44_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
DATA  expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
DATA  expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808

GLOBL expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
DATA  expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
DATA  expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
DATA  expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
DATA  expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
DATA  expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
DATA  expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
DATA  expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02

GLOBL expandAVX512_44_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
DATA  expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
DATA  expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
DATA  expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
DATA  expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
DATA  expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
DATA  expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
DATA  expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202

GLOBL expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
DATA  expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
DATA  expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_44_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
DATA  expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
DATA  expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
DATA  expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
DATA  expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
DATA  expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
DATA  expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_44_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
DATA  expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
DATA  expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
DATA  expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
DATA  expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
DATA  expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
DATA  expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
DATA  expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e

GLOBL expandAVX512_44_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
DATA  expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
DATA  expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
DATA  expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
DATA  expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
DATA  expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
DATA  expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff

GLOBL expandAVX512_44_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
DATA  expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
DATA  expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
DATA  expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
DATA  expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21

TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_44_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_44_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_44_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_44_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_44_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_44_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_44_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_44_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_44_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_44_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_44_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0xce79fe003fffffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x318601ffc0000000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
DATA  expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
DATA  expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
DATA  expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
DATA  expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
DATA  expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
DATA  expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
DATA  expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000

GLOBL expandAVX512_48_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
DATA  expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
DATA  expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
DATA  expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
DATA  expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
DATA  expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
DATA  expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
DATA  expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101

GLOBL expandAVX512_48_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
DATA  expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
DATA  expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
DATA  expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040

GLOBL expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
DATA  expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
DATA  expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
DATA  expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
DATA  expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
DATA  expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_48_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
DATA  expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
DATA  expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_48_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
DATA  expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
DATA  expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
DATA  expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
DATA  expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
DATA  expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
DATA  expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
DATA  expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948

GLOBL expandAVX512_48_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
DATA  expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
DATA  expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
DATA  expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
DATA  expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
DATA  expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
DATA  expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
DATA  expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e

TEXT expandAVX512_48<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_48_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_48_inShuf1<>(SB), Z3
	VMOVDQU64 expandAVX512_48_inShuf2<>(SB), Z4
	VMOVDQU64 expandAVX512_48_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_48_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z5
	VPERMB Z5, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_48_mat0<>(SB), Z0, Z0
	VPERMB Z5, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_48_mat1<>(SB), Z3, Z3
	VPERMB Z5, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_48_mat2<>(SB), Z4, Z4
	VPERMI2B Z3, Z0, Z1
	VPERMI2B Z4, Z3, Z2
	RET

GLOBL expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
DATA  expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
DATA  expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
DATA  expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
DATA  expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
DATA  expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
DATA  expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00

GLOBL expandAVX512_52_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
DATA  expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
DATA  expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
DATA  expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
DATA  expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
DATA  expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
DATA  expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101

GLOBL expandAVX512_52_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
DATA  expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
DATA  expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404

GLOBL expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
DATA  expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
DATA  expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
DATA  expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
DATA  expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
DATA  expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
DATA  expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
DATA  expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101

GLOBL expandAVX512_52_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
DATA  expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
DATA  expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
DATA  expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
DATA  expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
DATA  expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
DATA  expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
DATA  expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
DATA  expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
DATA  expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_52_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_52_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
DATA  expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
DATA  expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
DATA  expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
DATA  expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
DATA  expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
DATA  expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
DATA  expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362

GLOBL expandAVX512_52_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
DATA  expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
DATA  expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
DATA  expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
DATA  expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
DATA  expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
DATA  expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
DATA  expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff

GLOBL expandAVX512_52_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
DATA  expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
DATA  expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211

TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_52_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_52_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_52_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_52_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_52_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_52_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_52_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_52_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_52_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_52_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_52_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0x387f80ffffffffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0xc7807f0000000000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
DATA  expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
DATA  expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
DATA  expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
DATA  expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
DATA  expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
DATA  expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
DATA  expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000

GLOBL expandAVX512_56_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
DATA  expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
DATA  expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
DATA  expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
DATA  expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
DATA  expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
DATA  expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
DATA  expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101

GLOBL expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
DATA  expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
DATA  expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
DATA  expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_56_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
DATA  expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_56_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
DATA  expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
DATA  expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
DATA  expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
DATA  expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
DATA  expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
DATA  expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
DATA  expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007

GLOBL expandAVX512_56_outShufHi(SB), RODATA, $0x40
DATA  expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
DATA  expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
DATA  expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
DATA  expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
DATA  expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
DATA  expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
DATA  expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
DATA  expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f

TEXT expandAVX512_56<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_56_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_56_mat0<>(SB), Z3
	VMOVDQU64 expandAVX512_56_inShuf1<>(SB), Z4
	VMOVDQU64 expandAVX512_56_inShuf2<>(SB), Z5
	VMOVDQU64 expandAVX512_56_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_56_outShufHi(SB), Z2
	VMOVDQU64 (AX), Z6
	VPERMB Z6, Z0, Z0
	VGF2P8AFFINEQB $0, Z3, Z0, Z0
	VPERMB Z6, Z4, Z4
	VGF2P8AFFINEQB $0, Z3, Z4, Z3
	VPERMB Z6, Z5, Z4
	VGF2P8AFFINEQB $0, expandAVX512_56_mat2<>(SB), Z4, Z4
	VPERMI2B Z3, Z0, Z1
	VPERMI2B Z4, Z3, Z2
	RET

GLOBL expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
DATA  expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00

GLOBL expandAVX512_60_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
DATA  expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
DATA  expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
DATA  expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
DATA  expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
DATA  expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
DATA  expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020

GLOBL expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
DATA  expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
DATA  expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
DATA  expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
DATA  expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
DATA  expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101

GLOBL expandAVX512_60_mat1<>(SB), RODATA, $0x40
DATA  expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
DATA  expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
DATA  expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
DATA  expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
DATA  expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
DATA  expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
DATA  expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202

GLOBL expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
DATA  expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
DATA  expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
DATA  expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
DATA  expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
DATA  expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
DATA  expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
DATA  expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
DATA  expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01

GLOBL expandAVX512_60_mat2<>(SB), RODATA, $0x40
DATA  expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
DATA  expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
DATA  expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
DATA  expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
DATA  expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
DATA  expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080

GLOBL expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
DATA  expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
DATA  expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
DATA  expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff

GLOBL expandAVX512_60_mat3<>(SB), RODATA, $0x40
DATA  expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
DATA  expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
DATA  expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
DATA  expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
DATA  expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_60_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
DATA  expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
DATA  expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
DATA  expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
DATA  expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
DATA  expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
DATA  expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
DATA  expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b

GLOBL expandAVX512_60_outShufHi0(SB), RODATA, $0x40
DATA  expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
DATA  expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
DATA  expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
DATA  expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
DATA  expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
DATA  expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
DATA  expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
DATA  expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b

GLOBL expandAVX512_60_outShufHi1(SB), RODATA, $0x40
DATA  expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
DATA  expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
DATA  expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff

TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_60_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_60_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_60_inShuf2<>(SB), Z3
	VMOVDQU64 expandAVX512_60_inShuf3<>(SB), Z4
	VMOVDQU64 expandAVX512_60_outShufLo(SB), Z1
	VMOVDQU64 expandAVX512_60_outShufHi0(SB), Z5
	VMOVDQU64 expandAVX512_60_outShufHi1(SB), Z6
	VMOVDQU64 (AX), Z7
	VPERMB Z7, Z0, Z0
	VGF2P8AFFINEQB $0, expandAVX512_60_mat0<>(SB), Z0, Z0
	VPERMB Z7, Z2, Z2
	VGF2P8AFFINEQB $0, expandAVX512_60_mat1<>(SB), Z2, Z2
	VPERMB Z7, Z3, Z3
	VGF2P8AFFINEQB $0, expandAVX512_60_mat2<>(SB), Z3, Z3
	VPERMB Z7, Z4, Z4
	VGF2P8AFFINEQB $0, expandAVX512_60_mat3<>(SB), Z4, Z4
	VPERMI2B Z2, Z0, Z1
	MOVQ $0x9f01ffffffffffff, AX
	KMOVQ AX, K1
	VPERMI2B.Z Z3, Z2, K1, Z5
	MOVQ $0x60fe000000000000, AX
	KMOVQ AX, K1
	VPERMB.Z Z4, Z6, K1, Z0
	VPORQ Z0, Z5, Z2
	RET

GLOBL expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
DATA  expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
DATA  expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000

GLOBL expandAVX512_64_mat0<>(SB), RODATA, $0x40
DATA  expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
DATA  expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
DATA  expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
DATA  expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
DATA  expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
DATA  expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
DATA  expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080

GLOBL expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
DATA  expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
DATA  expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101

GLOBL expandAVX512_64_outShufLo(SB), RODATA, $0x40
DATA  expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
DATA  expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
DATA  expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
DATA  expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
DATA  expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
DATA  expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
DATA  expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
DATA  expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938

TEXT expandAVX512_64<>(SB), NOSPLIT, $0-0
	VMOVDQU64 expandAVX512_64_inShuf0<>(SB), Z0
	VMOVDQU64 expandAVX512_64_mat0<>(SB), Z1
	VMOVDQU64 expandAVX512_64_inShuf1<>(SB), Z2
	VMOVDQU64 expandAVX512_64_outShufLo(SB), Z3
	VMOVDQU64 (AX), Z4
	VPERMB Z4, Z0, Z0
	VGF2P8AFFINEQB $0, Z1, Z0, Z0
	VPERMB Z4, Z2, Z2
	VGF2P8AFFINEQB $0, Z1, Z2, Z2
	VPERMB Z0, Z3, Z1
	VPERMB Z2, Z3, Z2
	RET