// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.

//go:build goexperiment.simd

package archsimd

/* AESDecryptLastRound */

// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX, AES
func ( Uint8x16) ( Uint32x4) Uint8x16

// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX512VAES
func ( Uint8x32) ( Uint32x8) Uint8x32

// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX512VAES
func ( Uint8x64) ( Uint32x16) Uint8x64

/* AESDecryptOneRound */

// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX, AES
func ( Uint8x16) ( Uint32x4) Uint8x16

// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX512VAES
func ( Uint8x32) ( Uint32x8) Uint8x32

// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX512VAES
func ( Uint8x64) ( Uint32x16) Uint8x64

/* AESEncryptLastRound */

// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX, AES
func ( Uint8x16) ( Uint32x4) Uint8x16

// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX512VAES
func ( Uint8x32) ( Uint32x8) Uint8x32

// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX512VAES
func ( Uint8x64) ( Uint32x16) Uint8x64

/* AESEncryptOneRound */

// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX, AES
func ( Uint8x16) ( Uint32x4) Uint8x16

// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX512VAES
func ( Uint8x32) ( Uint32x8) Uint8x32

// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX512VAES
func ( Uint8x64) ( Uint32x16) Uint8x64

/* AESInvMixColumns */

// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
// x is the chunk of w array in use.
// result = InvMixColumns(x)
//
// Asm: VAESIMC, CPU Feature: AVX, AES
func ( Uint32x4) () Uint32x4

/* AESRoundKeyGenAssist */

// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
// x is an array of AES words, but only x[0] and x[2] are used.
// r is a value from the Rcon constant array.
// result[0] = XOR(SubWord(RotWord(x[0])), r)
// result[1] = SubWord(x[1])
// result[2] = XOR(SubWord(RotWord(x[2])), r)
// result[3] = SubWord(x[3])
//
// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
func ( Uint32x4) ( uint8) Uint32x4

/* Abs */

// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX
func ( Int8x16) () Int8x16

// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX2
func ( Int8x32) () Int8x32

// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512
func ( Int8x64) () Int8x64

// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX
func ( Int16x8) () Int16x8

// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX2
func ( Int16x16) () Int16x16

// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512
func ( Int16x32) () Int16x32

// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX
func ( Int32x4) () Int32x4

// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX2
func ( Int32x8) () Int32x8

// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512
func ( Int32x16) () Int32x16

// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512
func ( Int64x2) () Int64x2

// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512
func ( Int64x4) () Int64x4

// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512
func ( Int64x8) () Int64x8

/* Add */

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* AddPairs */

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

/* AddPairsSaturated */

// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

/* AddSaturated */

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

/* AddSub */

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

/* And */

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* AndNot */

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDNQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDNQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* Average */

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

/* Broadcast128 */

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func ( Float32x4) () Float32x4

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func ( Float64x2) () Float64x2

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func ( Int8x16) () Int8x16

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func ( Int16x8) () Int16x8

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func ( Int32x4) () Int32x4

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func ( Int64x2) () Int64x2

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func ( Uint8x16) () Uint8x16

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func ( Uint16x8) () Uint16x8

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func ( Uint32x4) () Uint32x4

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func ( Uint64x2) () Uint64x2

/* Broadcast256 */

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func ( Float32x4) () Float32x8

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX2
func ( Float64x2) () Float64x4

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func ( Int8x16) () Int8x32

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func ( Int16x8) () Int16x16

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func ( Int32x4) () Int32x8

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func ( Int64x2) () Int64x4

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func ( Uint8x16) () Uint8x32

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func ( Uint16x8) () Uint16x16

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func ( Uint32x4) () Uint32x8

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func ( Uint64x2) () Uint64x4

/* Broadcast512 */

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func ( Float32x4) () Float32x16

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX512
func ( Float64x2) () Float64x8

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func ( Int8x16) () Int8x64

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func ( Int16x8) () Int16x32

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func ( Int32x4) () Int32x16

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func ( Int64x2) () Int64x8

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func ( Uint8x16) () Uint8x64

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func ( Uint16x8) () Uint16x32

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func ( Uint32x4) () Uint32x16

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func ( Uint64x2) () Uint64x8

/* Ceil */

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x2) () Float64x2

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x4) () Float64x4

/* CeilScaled */

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* CeilScaledResidue */

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* Compress */

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512
func ( Float32x4) ( Mask32x4) Float32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512
func ( Float32x8) ( Mask32x8) Float32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512
func ( Float32x16) ( Mask32x16) Float32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512
func ( Float64x2) ( Mask64x2) Float64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512
func ( Float64x4) ( Mask64x4) Float64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512
func ( Float64x8) ( Mask64x8) Float64x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func ( Int8x16) ( Mask8x16) Int8x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func ( Int8x32) ( Mask8x32) Int8x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func ( Int8x64) ( Mask8x64) Int8x64

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func ( Int16x8) ( Mask16x8) Int16x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func ( Int16x16) ( Mask16x16) Int16x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func ( Int16x32) ( Mask16x32) Int16x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func ( Int32x4) ( Mask32x4) Int32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func ( Int32x8) ( Mask32x8) Int32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func ( Int32x16) ( Mask32x16) Int32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func ( Int64x2) ( Mask64x2) Int64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func ( Int64x4) ( Mask64x4) Int64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func ( Int64x8) ( Mask64x8) Int64x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func ( Uint8x16) ( Mask8x16) Uint8x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func ( Uint8x32) ( Mask8x32) Uint8x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func ( Uint8x64) ( Mask8x64) Uint8x64

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func ( Uint16x8) ( Mask16x8) Uint16x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func ( Uint16x16) ( Mask16x16) Uint16x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func ( Uint16x32) ( Mask16x32) Uint16x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func ( Uint32x4) ( Mask32x4) Uint32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func ( Uint32x8) ( Mask32x8) Uint32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func ( Uint32x16) ( Mask32x16) Uint32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func ( Uint64x2) ( Mask64x2) Uint64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func ( Uint64x4) ( Mask64x4) Uint64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func ( Uint64x8) ( Mask64x8) Uint64x8

/* ConcatPermute */

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func ( Int8x16) ( Int8x16,  Uint8x16) Int8x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func ( Uint8x16) ( Uint8x16,  Uint8x16) Uint8x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func ( Int8x32) ( Int8x32,  Uint8x32) Int8x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func ( Uint8x32) ( Uint8x32,  Uint8x32) Uint8x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func ( Int8x64) ( Int8x64,  Uint8x64) Int8x64

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func ( Uint8x64) ( Uint8x64,  Uint8x64) Uint8x64

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func ( Int16x8) ( Int16x8,  Uint16x8) Int16x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func ( Uint16x8) ( Uint16x8,  Uint16x8) Uint16x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func ( Int16x16) ( Int16x16,  Uint16x16) Int16x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func ( Uint16x16) ( Uint16x16,  Uint16x16) Uint16x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func ( Int16x32) ( Int16x32,  Uint16x32) Int16x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32,  Uint16x32) Uint16x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512
func ( Float32x4) ( Float32x4,  Uint32x4) Float32x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func ( Int32x4) ( Int32x4,  Uint32x4) Int32x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func ( Uint32x4) ( Uint32x4,  Uint32x4) Uint32x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512
func ( Float32x8) ( Float32x8,  Uint32x8) Float32x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func ( Int32x8) ( Int32x8,  Uint32x8) Int32x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func ( Uint32x8) ( Uint32x8,  Uint32x8) Uint32x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16,  Uint32x16) Float32x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func ( Int32x16) ( Int32x16,  Uint32x16) Int32x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16,  Uint32x16) Uint32x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512
func ( Float64x2) ( Float64x2,  Uint64x2) Float64x2

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func ( Int64x2) ( Int64x2,  Uint64x2) Int64x2

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func ( Uint64x2) ( Uint64x2,  Uint64x2) Uint64x2

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512
func ( Float64x4) ( Float64x4,  Uint64x4) Float64x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func ( Int64x4) ( Int64x4,  Uint64x4) Int64x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4,  Uint64x4) Uint64x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8,  Uint64x8) Float64x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func ( Int64x8) ( Int64x8,  Uint64x8) Int64x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8,  Uint64x8) Uint64x8

/* ConcatShiftBytesRight */

// ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes.
// The result vector will be the lower half of the concatenated vector.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX
func ( Uint8x16) ( uint8,  Uint8x16) Uint8x16

/* ConcatShiftBytesRightGrouped */

// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX2
func ( Uint8x32) ( uint8,  Uint8x32) Uint8x32

// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX512
func ( Uint8x64) ( uint8,  Uint8x64) Uint8x64

/* ConvertToFloat32 */

// ConvertToFloat32 converts element values to float32.
// The result vector's elements are rounded to the nearest value.
//
// Asm: VCVTPD2PSX, CPU Feature: AVX
func ( Float64x2) () Float32x4

// ConvertToFloat32 converts element values to float32.
// The result vector's elements are rounded to the nearest value.
//
// Asm: VCVTPD2PSY, CPU Feature: AVX
func ( Float64x4) () Float32x4

// ConvertToFloat32 converts element values to float32.
// The result vector's elements are rounded to the nearest value.
//
// Asm: VCVTPD2PS, CPU Feature: AVX512
func ( Float64x8) () Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTDQ2PS, CPU Feature: AVX
func ( Int32x4) () Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTDQ2PS, CPU Feature: AVX
func ( Int32x8) () Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTDQ2PS, CPU Feature: AVX512
func ( Int32x16) () Float32x16

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTQQ2PSX, CPU Feature: AVX512
func ( Int64x2) () Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTQQ2PSY, CPU Feature: AVX512
func ( Int64x4) () Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTQQ2PS, CPU Feature: AVX512
func ( Int64x8) () Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUDQ2PS, CPU Feature: AVX512
func ( Uint32x4) () Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUDQ2PS, CPU Feature: AVX512
func ( Uint32x8) () Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUDQ2PS, CPU Feature: AVX512
func ( Uint32x16) () Float32x16

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUQQ2PSX, CPU Feature: AVX512
func ( Uint64x2) () Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUQQ2PSY, CPU Feature: AVX512
func ( Uint64x4) () Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUQQ2PS, CPU Feature: AVX512
func ( Uint64x8) () Float32x8

/* ConvertToFloat64 */

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTPS2PD, CPU Feature: AVX
func ( Float32x4) () Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTPS2PD, CPU Feature: AVX512
func ( Float32x8) () Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTDQ2PD, CPU Feature: AVX
func ( Int32x4) () Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTDQ2PD, CPU Feature: AVX512
func ( Int32x8) () Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTQQ2PD, CPU Feature: AVX512
func ( Int64x2) () Float64x2

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTQQ2PD, CPU Feature: AVX512
func ( Int64x4) () Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTQQ2PD, CPU Feature: AVX512
func ( Int64x8) () Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUDQ2PD, CPU Feature: AVX512
func ( Uint32x4) () Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUDQ2PD, CPU Feature: AVX512
func ( Uint32x8) () Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUQQ2PD, CPU Feature: AVX512
func ( Uint64x2) () Float64x2

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUQQ2PD, CPU Feature: AVX512
func ( Uint64x4) () Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUQQ2PD, CPU Feature: AVX512
func ( Uint64x8) () Float64x8

/* ConvertToInt32 */

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX
func ( Float32x4) () Int32x4

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX
func ( Float32x8) () Int32x8

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX512
func ( Float32x16) () Int32x16

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2DQX, CPU Feature: AVX
func ( Float64x2) () Int32x4

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2DQY, CPU Feature: AVX
func ( Float64x4) () Int32x4

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2DQ, CPU Feature: AVX512
func ( Float64x8) () Int32x8

/* ConvertToInt64 */

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2QQ, CPU Feature: AVX512
func ( Float32x4) () Int64x4

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2QQ, CPU Feature: AVX512
func ( Float32x8) () Int64x8

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2QQ, CPU Feature: AVX512
func ( Float64x2) () Int64x2

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2QQ, CPU Feature: AVX512
func ( Float64x4) () Int64x4

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2QQ, CPU Feature: AVX512
func ( Float64x8) () Int64x8

/* ConvertToUint32 */

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UDQ, CPU Feature: AVX512
func ( Float32x4) () Uint32x4

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UDQ, CPU Feature: AVX512
func ( Float32x8) () Uint32x8

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UDQ, CPU Feature: AVX512
func ( Float32x16) () Uint32x16

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UDQX, CPU Feature: AVX512
func ( Float64x2) () Uint32x4

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UDQY, CPU Feature: AVX512
func ( Float64x4) () Uint32x4

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UDQ, CPU Feature: AVX512
func ( Float64x8) () Uint32x8

/* ConvertToUint64 */

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UQQ, CPU Feature: AVX512
func ( Float32x4) () Uint64x4

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UQQ, CPU Feature: AVX512
func ( Float32x8) () Uint64x8

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UQQ, CPU Feature: AVX512
func ( Float64x2) () Uint64x2

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UQQ, CPU Feature: AVX512
func ( Float64x4) () Uint64x4

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UQQ, CPU Feature: AVX512
func ( Float64x8) () Uint64x8

/* CopySign */

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGND, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGND, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

/* Div */

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

/* DotProductPairs */

// DotProductPairs multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int32x4

// DotProductPairs multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int32x8

// DotProductPairs multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int32x16

/* DotProductPairsSaturated */

// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX
func ( Uint8x16) ( Int8x16) Int16x8

// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX2
func ( Uint8x32) ( Int8x32) Int16x16

// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512
func ( Uint8x64) ( Int8x64) Int16x32

/* DotProductQuadruple */

// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func ( Int8x16) ( Uint8x16) Int32x4

// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func ( Int8x32) ( Uint8x32) Int32x8

// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func ( Int8x64) ( Uint8x64) Int32x16

/* DotProductQuadrupleSaturated */

// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func ( Int8x16) ( Uint8x16) Int32x4

// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func ( Int8x32) ( Uint8x32) Int32x8

// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func ( Int8x64) ( Uint8x64) Int32x16

/* Equal */

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Mask8x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Mask8x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Mask8x64

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Mask16x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Mask16x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Mask16x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Mask32x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Mask32x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Mask32x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Mask64x2

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Mask64x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Mask64x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Mask8x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Mask8x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Mask8x64

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Mask16x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Mask16x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Mask16x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Mask32x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Mask32x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Mask32x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Mask64x2

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Mask64x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Mask64x8

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

/* Expand */

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512
func ( Float32x4) ( Mask32x4) Float32x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512
func ( Float32x8) ( Mask32x8) Float32x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512
func ( Float32x16) ( Mask32x16) Float32x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512
func ( Float64x2) ( Mask64x2) Float64x2

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512
func ( Float64x4) ( Mask64x4) Float64x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512
func ( Float64x8) ( Mask64x8) Float64x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func ( Int8x16) ( Mask8x16) Int8x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func ( Int8x32) ( Mask8x32) Int8x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func ( Int8x64) ( Mask8x64) Int8x64

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func ( Int16x8) ( Mask16x8) Int16x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func ( Int16x16) ( Mask16x16) Int16x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func ( Int16x32) ( Mask16x32) Int16x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func ( Int32x4) ( Mask32x4) Int32x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func ( Int32x8) ( Mask32x8) Int32x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func ( Int32x16) ( Mask32x16) Int32x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func ( Int64x2) ( Mask64x2) Int64x2

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func ( Int64x4) ( Mask64x4) Int64x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func ( Int64x8) ( Mask64x8) Int64x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func ( Uint8x16) ( Mask8x16) Uint8x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func ( Uint8x32) ( Mask8x32) Uint8x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func ( Uint8x64) ( Mask8x64) Uint8x64

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func ( Uint16x8) ( Mask16x8) Uint16x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func ( Uint16x16) ( Mask16x16) Uint16x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func ( Uint16x32) ( Mask16x32) Uint16x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func ( Uint32x4) ( Mask32x4) Uint32x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func ( Uint32x8) ( Mask32x8) Uint32x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func ( Uint32x16) ( Mask32x16) Uint32x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func ( Uint64x2) ( Mask64x2) Uint64x2

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func ( Uint64x4) ( Mask64x4) Uint64x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func ( Uint64x8) ( Mask64x8) Uint64x8

/* ExtendLo2ToInt64x2 */

// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX
func ( Int8x16) () Int64x2

// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX
func ( Int16x8) () Int64x2

// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX
func ( Int32x4) () Int64x2

/* ExtendLo2ToUint64x2 */

// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX
func ( Uint8x16) () Uint64x2

// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX
func ( Uint16x8) () Uint64x2

// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX
func ( Uint32x4) () Uint64x2

/* ExtendLo4ToInt32x4 */

// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBD, CPU Feature: AVX
func ( Int8x16) () Int32x4

// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWD, CPU Feature: AVX
func ( Int16x8) () Int32x4

/* ExtendLo4ToInt64x4 */

// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX2
func ( Int8x16) () Int64x4

// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX2
func ( Int16x8) () Int64x4

/* ExtendLo4ToUint32x4 */

// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBD, CPU Feature: AVX
func ( Uint8x16) () Uint32x4

// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWD, CPU Feature: AVX
func ( Uint16x8) () Uint32x4

/* ExtendLo4ToUint64x4 */

// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX2
func ( Uint8x16) () Uint64x4

// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX2
func ( Uint16x8) () Uint64x4

/* ExtendLo8ToInt16x8 */

// ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBW, CPU Feature: AVX
func ( Int8x16) () Int16x8

/* ExtendLo8ToInt32x8 */

// ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBD, CPU Feature: AVX2
func ( Int8x16) () Int32x8

/* ExtendLo8ToInt64x8 */

// ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX512
func ( Int8x16) () Int64x8

/* ExtendLo8ToUint16x8 */

// ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBW, CPU Feature: AVX
func ( Uint8x16) () Uint16x8

/* ExtendLo8ToUint32x8 */

// ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBD, CPU Feature: AVX2
func ( Uint8x16) () Uint32x8

/* ExtendLo8ToUint64x8 */

// ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX512
func ( Uint8x16) () Uint64x8

/* ExtendToInt16 */

// ExtendToInt16 converts element values to int16.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBW, CPU Feature: AVX2
func ( Int8x16) () Int16x16

// ExtendToInt16 converts element values to int16.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBW, CPU Feature: AVX512
func ( Int8x32) () Int16x32

/* ExtendToInt32 */

// ExtendToInt32 converts element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBD, CPU Feature: AVX512
func ( Int8x16) () Int32x16

// ExtendToInt32 converts element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWD, CPU Feature: AVX2
func ( Int16x8) () Int32x8

// ExtendToInt32 converts element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWD, CPU Feature: AVX512
func ( Int16x16) () Int32x16

/* ExtendToInt64 */

// ExtendToInt64 converts element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX512
func ( Int16x8) () Int64x8

// ExtendToInt64 converts element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX2
func ( Int32x4) () Int64x4

// ExtendToInt64 converts element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX512
func ( Int32x8) () Int64x8

/* ExtendToUint16 */

// ExtendToUint16 converts element values to uint16.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBW, CPU Feature: AVX2
func ( Uint8x16) () Uint16x16

// ExtendToUint16 converts element values to uint16.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBW, CPU Feature: AVX512
func ( Uint8x32) () Uint16x32

/* ExtendToUint32 */

// ExtendToUint32 converts element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBD, CPU Feature: AVX512
func ( Uint8x16) () Uint32x16

// ExtendToUint32 converts element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWD, CPU Feature: AVX2
func ( Uint16x8) () Uint32x8

// ExtendToUint32 converts element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWD, CPU Feature: AVX512
func ( Uint16x16) () Uint32x16

/* ExtendToUint64 */

// ExtendToUint64 converts element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX512
func ( Uint16x8) () Uint64x8

// ExtendToUint64 converts element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX2
func ( Uint32x4) () Uint64x4

// ExtendToUint64 converts element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX512
func ( Uint32x8) () Uint64x8

/* Floor */

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x2) () Float64x2

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x4) () Float64x4

/* FloorScaled */

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* FloorScaledResidue */

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* GaloisFieldAffineTransform */

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func ( Uint8x16) ( Uint64x2,  uint8) Uint8x16

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func ( Uint8x32) ( Uint64x4,  uint8) Uint8x32

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func ( Uint8x64) ( Uint64x8,  uint8) Uint8x64

/* GaloisFieldAffineTransformInverse */

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func ( Uint8x16) ( Uint64x2,  uint8) Uint8x16

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func ( Uint8x32) ( Uint64x4,  uint8) Uint8x32

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func ( Uint8x64) ( Uint64x8,  uint8) Uint8x64

/* GaloisFieldMul */

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func ( Uint8x16) ( Uint8x16) Uint8x16

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func ( Uint8x32) ( Uint8x32) Uint8x32

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func ( Uint8x64) ( Uint8x64) Uint8x64

/* GetElem */

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRD, CPU Feature: AVX
func ( Float32x4) ( uint8) float32

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func ( Float64x2) ( uint8) float64

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRB, CPU Feature: AVX512
func ( Int8x16) ( uint8) int8

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRW, CPU Feature: AVX512
func ( Int16x8) ( uint8) int16

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRD, CPU Feature: AVX
func ( Int32x4) ( uint8) int32

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func ( Int64x2) ( uint8) int64

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRB, CPU Feature: AVX512
func ( Uint8x16) ( uint8) uint8

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRW, CPU Feature: AVX512
func ( Uint16x8) ( uint8) uint16

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRD, CPU Feature: AVX
func ( Uint32x4) ( uint8) uint32

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func ( Uint64x2) ( uint8) uint64

/* GetHi */

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func ( Float32x8) () Float32x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func ( Float32x16) () Float32x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func ( Float64x4) () Float64x2

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func ( Float64x8) () Float64x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int8x32) () Int8x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int8x64) () Int8x32

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int16x16) () Int16x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int16x32) () Int16x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int32x8) () Int32x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int32x16) () Int32x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int64x4) () Int64x2

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int64x8) () Int64x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint8x32) () Uint8x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint8x64) () Uint8x32

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint16x16) () Uint16x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint16x32) () Uint16x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint32x8) () Uint32x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint32x16) () Uint32x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint64x4) () Uint64x2

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint64x8) () Uint64x4

/* GetLo */

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func ( Float32x8) () Float32x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func ( Float32x16) () Float32x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func ( Float64x4) () Float64x2

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func ( Float64x8) () Float64x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int8x32) () Int8x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int8x64) () Int8x32

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int16x16) () Int16x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int16x32) () Int16x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int32x8) () Int32x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int32x16) () Int32x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Int64x4) () Int64x2

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Int64x8) () Int64x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint8x32) () Uint8x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint8x64) () Uint8x32

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint16x16) () Uint16x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint16x32) () Uint16x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint32x8) () Uint32x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint32x16) () Uint32x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func ( Uint64x4) () Uint64x2

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func ( Uint64x8) () Uint64x4

/* Greater */

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Mask8x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Mask8x32

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Mask8x64

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Mask16x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Mask16x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Mask16x32

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Mask32x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Mask32x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Mask32x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Mask64x2

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Mask64x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Mask64x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Mask8x64

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Mask16x32

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Mask32x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Mask64x8

/* GreaterEqual */

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Mask8x64

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Mask16x32

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Mask32x16

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Mask64x8

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Mask8x64

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Mask16x32

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Mask32x16

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Mask64x8

/* InterleaveHi */

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

/* InterleaveHiGrouped */

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* InterleaveLo */

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

/* InterleaveLoGrouped */

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* IsNan */

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

/* LeadingZeros */

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func ( Int32x4) () Int32x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func ( Int32x8) () Int32x8

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func ( Int32x16) () Int32x16

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func ( Int64x2) () Int64x2

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func ( Int64x4) () Int64x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func ( Int64x8) () Int64x8

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func ( Uint32x4) () Uint32x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func ( Uint32x8) () Uint32x8

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func ( Uint32x16) () Uint32x16

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func ( Uint64x2) () Uint64x2

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func ( Uint64x4) () Uint64x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func ( Uint64x8) () Uint64x8

/* Less */

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Mask8x64

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Mask16x32

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Mask32x16

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Mask64x8

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Mask8x64

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Mask16x32

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Mask32x16

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Mask64x8

/* LessEqual */

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Mask8x64

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Mask16x32

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Mask32x16

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Mask64x8

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Mask8x64

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Mask16x32

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Mask32x16

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Mask64x8

/* Max */

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func ( Int64x2) ( Int64x2) Int64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func ( Int64x4) ( Int64x4) Int64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func ( Uint64x2) ( Uint64x2) Uint64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4) Uint64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* Min */

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func ( Int64x2) ( Int64x2) Int64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func ( Int64x4) ( Int64x4) Int64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func ( Uint64x2) ( Uint64x2) Uint64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4) Uint64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* Mul */

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func ( Int64x2) ( Int64x2) Int64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func ( Int64x4) ( Int64x4) Int64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func ( Uint64x2) ( Uint64x2) Uint64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4) Uint64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* MulAdd */

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512
func ( Float32x4) ( Float32x4,  Float32x4) Float32x4

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512
func ( Float32x8) ( Float32x8,  Float32x8) Float32x8

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16,  Float32x16) Float32x16

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512
func ( Float64x2) ( Float64x2,  Float64x2) Float64x2

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512
func ( Float64x4) ( Float64x4,  Float64x4) Float64x4

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8,  Float64x8) Float64x8

/* MulAddSub */

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
func ( Float32x4) ( Float32x4,  Float32x4) Float32x4

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
func ( Float32x8) ( Float32x8,  Float32x8) Float32x8

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16,  Float32x16) Float32x16

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
func ( Float64x2) ( Float64x2,  Float64x2) Float64x2

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
func ( Float64x4) ( Float64x4,  Float64x4) Float64x4

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8,  Float64x8) Float64x8

/* MulEvenWiden */

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int64x4

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint64x4

/* MulHigh */

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

/* MulSubAdd */

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
func ( Float32x4) ( Float32x4,  Float32x4) Float32x4

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
func ( Float32x8) ( Float32x8,  Float32x8) Float32x8

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16,  Float32x16) Float32x16

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
func ( Float64x2) ( Float64x2,  Float64x2) Float64x2

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
func ( Float64x4) ( Float64x4,  Float64x4) Float64x4

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8,  Float64x8) Float64x8

/* NotEqual */

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Mask32x4

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Mask32x8

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Mask32x16

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Mask64x2

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Mask64x4

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Mask64x8

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Mask8x64

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Mask16x32

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Mask32x16

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Mask64x8

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Mask8x64

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Mask16x32

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Mask32x16

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Mask64x8

/* OnesCount */

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func ( Int8x16) () Int8x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func ( Int8x32) () Int8x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func ( Int8x64) () Int8x64

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func ( Int16x8) () Int16x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func ( Int16x16) () Int16x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func ( Int16x32) () Int16x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func ( Int32x4) () Int32x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func ( Int32x8) () Int32x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func ( Int32x16) () Int32x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func ( Int64x2) () Int64x2

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func ( Int64x4) () Int64x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func ( Int64x8) () Int64x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func ( Uint8x16) () Uint8x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func ( Uint8x32) () Uint8x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func ( Uint8x64) () Uint8x64

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func ( Uint16x8) () Uint16x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func ( Uint16x16) () Uint16x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func ( Uint16x32) () Uint16x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func ( Uint32x4) () Uint32x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func ( Uint32x8) () Uint32x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func ( Uint32x16) () Uint32x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func ( Uint64x2) () Uint64x2

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func ( Uint64x4) () Uint64x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func ( Uint64x8) () Uint64x8

/* Or */

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* Permute */

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func ( Int8x16) ( Uint8x16) Int8x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func ( Uint8x16) ( Uint8x16) Uint8x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func ( Int8x32) ( Uint8x32) Int8x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func ( Uint8x32) ( Uint8x32) Uint8x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 6 bits (values 0-63) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func ( Int8x64) ( Uint8x64) Int8x64

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 6 bits (values 0-63) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func ( Uint8x64) ( Uint8x64) Uint8x64

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func ( Int16x8) ( Uint16x8) Int16x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func ( Uint16x8) ( Uint16x8) Uint16x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func ( Int16x16) ( Uint16x16) Int16x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func ( Uint16x16) ( Uint16x16) Uint16x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func ( Int16x32) ( Uint16x32) Int16x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMPS, CPU Feature: AVX2
func ( Float32x8) ( Uint32x8) Float32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX2
func ( Int32x8) ( Uint32x8) Int32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMPS, CPU Feature: AVX512
func ( Float32x16) ( Uint32x16) Float32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX512
func ( Int32x16) ( Uint32x16) Int32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 2 bits (values 0-3) of each element of indices is used
//
// Asm: VPERMPD, CPU Feature: AVX512
func ( Float64x4) ( Uint64x4) Float64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 2 bits (values 0-3) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func ( Int64x4) ( Uint64x4) Int64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 2 bits (values 0-3) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4) Uint64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMPD, CPU Feature: AVX512
func ( Float64x8) ( Uint64x8) Float64x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func ( Int64x8) ( Uint64x8) Int64x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* PermuteOrZero */

// PermuteOrZero performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
//
// Asm: VPSHUFB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// PermuteOrZero performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
//
// Asm: VPSHUFB, CPU Feature: AVX
func ( Uint8x16) ( Int8x16) Uint8x16

/* PermuteOrZeroGrouped */

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX2
func ( Uint8x32) ( Int8x32) Uint8x32

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX512
func ( Uint8x64) ( Int8x64) Uint8x64

/* Reciprocal */

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCPPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCPPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512
func ( Float32x16) () Float32x16

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512
func ( Float64x2) () Float64x2

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512
func ( Float64x4) () Float64x4

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512
func ( Float64x8) () Float64x8

/* ReciprocalSqrt */

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRTPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRTPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512
func ( Float32x16) () Float32x16

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512
func ( Float64x2) () Float64x2

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512
func ( Float64x4) () Float64x4

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512
func ( Float64x8) () Float64x8

/* RotateAllLeft */

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func ( Int32x4) ( uint8) Int32x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func ( Int32x8) ( uint8) Int32x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func ( Int32x16) ( uint8) Int32x16

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func ( Int64x2) ( uint8) Int64x2

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func ( Int64x4) ( uint8) Int64x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func ( Int64x8) ( uint8) Int64x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func ( Uint32x4) ( uint8) Uint32x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func ( Uint32x8) ( uint8) Uint32x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func ( Uint32x16) ( uint8) Uint32x16

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func ( Uint64x2) ( uint8) Uint64x2

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func ( Uint64x4) ( uint8) Uint64x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func ( Uint64x8) ( uint8) Uint64x8

/* RotateAllRight */

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func ( Int32x4) ( uint8) Int32x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func ( Int32x8) ( uint8) Int32x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func ( Int32x16) ( uint8) Int32x16

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func ( Int64x2) ( uint8) Int64x2

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func ( Int64x4) ( uint8) Int64x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func ( Int64x8) ( uint8) Int64x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func ( Uint32x4) ( uint8) Uint32x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func ( Uint32x8) ( uint8) Uint32x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func ( Uint32x16) ( uint8) Uint32x16

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func ( Uint64x2) ( uint8) Uint64x2

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func ( Uint64x4) ( uint8) Uint64x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func ( Uint64x8) ( uint8) Uint64x8

/* RotateLeft */

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func ( Int32x4) ( Int32x4) Int32x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func ( Int32x8) ( Int32x8) Int32x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func ( Int64x2) ( Int64x2) Int64x2

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func ( Int64x4) ( Int64x4) Int64x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func ( Uint32x4) ( Uint32x4) Uint32x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func ( Uint32x8) ( Uint32x8) Uint32x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func ( Uint64x2) ( Uint64x2) Uint64x2

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4) Uint64x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* RotateRight */

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func ( Int32x4) ( Int32x4) Int32x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func ( Int32x8) ( Int32x8) Int32x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func ( Int64x2) ( Int64x2) Int64x2

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func ( Int64x4) ( Int64x4) Int64x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func ( Uint32x4) ( Uint32x4) Uint32x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func ( Uint32x8) ( Uint32x8) Uint32x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func ( Uint64x2) ( Uint64x2) Uint64x2

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func ( Uint64x4) ( Uint64x4) Uint64x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* RoundToEven */

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x2) () Float64x2

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x4) () Float64x4

/* RoundToEvenScaled */

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* RoundToEvenScaledResidue */

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* SHA1FourRounds */

// SHA1FourRounds performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
// x contains the state variables a, b, c and d from upper to lower order.
// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
// result = the state variables a', b', c', d' updated after 4 rounds.
// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: SHA1RNDS4, CPU Feature: SHA
func ( Uint32x4) ( uint8,  Uint32x4) Uint32x4

/* SHA1Message1 */

// SHA1Message1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
// x = {W3, W2, W1, W0}
// y = {0, 0, W5, W4}
// result = {W3^W5, W2^W4, W1^W3, W0^W2}.
//
// Asm: SHA1MSG1, CPU Feature: SHA
func ( Uint32x4) ( Uint32x4) Uint32x4

/* SHA1Message2 */

// SHA1Message2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
// x = result of 2.
// y = {W15, W14, W13}
// result = {W19, W18, W17, W16}
//
// Asm: SHA1MSG2, CPU Feature: SHA
func ( Uint32x4) ( Uint32x4) Uint32x4

/* SHA1NextE */

// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
// x contains the state variable a (before the 4 rounds), placed in the upper element.
// y is the elements of W array for next 4 rounds from upper to lower order.
// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
// from upper to lower order.
// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
// computation of the value of e'.)
//
// Asm: SHA1NEXTE, CPU Feature: SHA
func ( Uint32x4) ( Uint32x4) Uint32x4

/* SHA256Message1 */

// SHA256Message1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
// x = {W0, W1, W2, W3}
// y = {W4, 0, 0, 0}
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
//
// Asm: SHA256MSG1, CPU Feature: SHA
func ( Uint32x4) ( Uint32x4) Uint32x4

/* SHA256Message2 */

// SHA256Message2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
// x = result of 2
// y = {0, 0, W14, W15}
// result = {W16, W17, W18, W19}
//
// Asm: SHA256MSG2, CPU Feature: SHA
func ( Uint32x4) ( Uint32x4) Uint32x4

/* SHA256TwoRounds */

// SHA256TwoRounds does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
// x = {h, g, d, c}
// y = {f, e, b, a}
// z = {W0+K0, W1+K1}
// result = {f', e', b', a'}
// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
// the corresponding element of the W array to make the input data z.
// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
// y (the state variables a, b, e, f before the 2 rounds).
//
// Asm: SHA256RNDS2, CPU Feature: SHA
func ( Uint32x4) ( Uint32x4,  Uint32x4) Uint32x4

/* SaturateToInt8 */

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func ( Int16x8) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func ( Int16x16) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func ( Int16x32) () Int8x32

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func ( Int32x4) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func ( Int32x8) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func ( Int32x16) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func ( Int64x2) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func ( Int64x4) () Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func ( Int64x8) () Int8x16

/* SaturateToInt16 */

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func ( Int32x4) () Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func ( Int32x8) () Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func ( Int32x16) () Int16x16

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func ( Int64x2) () Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func ( Int64x4) () Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func ( Int64x8) () Int16x8

/* SaturateToInt16Concat */

// SaturateToInt16Concat converts element values to int16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKSSDW, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int16x8

// SaturateToInt16Concat converts element values to int16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKSSDW, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int16x16

// SaturateToInt16Concat converts element values to int16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKSSDW, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int16x32

/* SaturateToInt32 */

// SaturateToInt32 converts element values to int32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func ( Int64x2) () Int32x4

// SaturateToInt32 converts element values to int32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func ( Int64x4) () Int32x4

// SaturateToInt32 converts element values to int32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func ( Int64x8) () Int32x8

/* SaturateToUint8 */

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func ( Int16x8) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func ( Int16x16) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func ( Int32x4) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func ( Int32x8) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func ( Int32x16) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func ( Int64x2) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func ( Int64x4) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func ( Int64x8) () Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSWB, CPU Feature: AVX512
func ( Uint16x32) () Uint8x32

/* SaturateToUint16 */

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func ( Uint32x4) () Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func ( Uint32x8) () Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func ( Uint32x16) () Uint16x16

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func ( Uint64x2) () Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func ( Uint64x4) () Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func ( Uint64x8) () Uint16x8

/* SaturateToUint16Concat */

// SaturateToUint16Concat converts element values to uint16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKUSDW, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint16x8

// SaturateToUint16Concat converts element values to uint16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKUSDW, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint16x16

// SaturateToUint16Concat converts element values to uint16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKUSDW, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint16x32

/* SaturateToUint32 */

// SaturateToUint32 converts element values to uint32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func ( Uint64x2) () Uint32x4

// SaturateToUint32 converts element values to uint32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func ( Uint64x4) () Uint32x4

// SaturateToUint32 converts element values to uint32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func ( Uint64x8) () Uint32x8

/* Scale */

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func ( Float32x4) ( Float32x4) Float32x4

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func ( Float32x8) ( Float32x8) Float32x8

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func ( Float64x2) ( Float64x2) Float64x2

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func ( Float64x4) ( Float64x4) Float64x4

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

/* Select128FromPair */

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2F128, CPU Feature: AVX
func ( Float32x8) (,  uint8,  Float32x8) Float32x8

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2F128, CPU Feature: AVX
func ( Float64x4) (,  uint8,  Float64x4) Float64x4

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
//	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
//
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Int8x32) (,  uint8,  Int8x32) Int8x32

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
//	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
//
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Int16x16) (,  uint8,  Int16x16) Int16x16

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Int32x8) (,  uint8,  Int32x8) Int32x8

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Int64x4) (,  uint8,  Int64x4) Int64x4

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
//	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
//
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Uint8x32) (,  uint8,  Uint8x32) Uint8x32

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
//	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
//
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Uint16x16) (,  uint8,  Uint16x16) Uint16x16

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Uint32x8) (,  uint8,  Uint32x8) Uint32x8

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func ( Uint64x4) (,  uint8,  Uint64x4) Uint64x4

/* SetElem */

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func ( Float32x4) ( uint8,  float32) Float32x4

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func ( Float64x2) ( uint8,  float64) Float64x2

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRB, CPU Feature: AVX
func ( Int8x16) ( uint8,  int8) Int8x16

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRW, CPU Feature: AVX
func ( Int16x8) ( uint8,  int16) Int16x8

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func ( Int32x4) ( uint8,  int32) Int32x4

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func ( Int64x2) ( uint8,  int64) Int64x2

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRB, CPU Feature: AVX
func ( Uint8x16) ( uint8,  uint8) Uint8x16

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRW, CPU Feature: AVX
func ( Uint16x8) ( uint8,  uint16) Uint16x8

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func ( Uint32x4) ( uint8,  uint32) Uint32x4

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func ( Uint64x2) ( uint8,  uint64) Uint64x2

/* SetHi */

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func ( Float32x8) ( Float32x4) Float32x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func ( Float32x16) ( Float32x8) Float32x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func ( Float64x4) ( Float64x2) Float64x4

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func ( Float64x8) ( Float64x4) Float64x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int8x32) ( Int8x16) Int8x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int8x64) ( Int8x32) Int8x64

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int16x16) ( Int16x8) Int16x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int16x32) ( Int16x16) Int16x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int32x8) ( Int32x4) Int32x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int32x16) ( Int32x8) Int32x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int64x4) ( Int64x2) Int64x4

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int64x8) ( Int64x4) Int64x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x16) Uint8x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x32) Uint8x64

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x8) Uint16x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x16) Uint16x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x4) Uint32x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x8) Uint32x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x2) Uint64x4

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x4) Uint64x8

/* SetLo */

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func ( Float32x8) ( Float32x4) Float32x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func ( Float32x16) ( Float32x8) Float32x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func ( Float64x4) ( Float64x2) Float64x4

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func ( Float64x8) ( Float64x4) Float64x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int8x32) ( Int8x16) Int8x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int8x64) ( Int8x32) Int8x64

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int16x16) ( Int16x8) Int16x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int16x32) ( Int16x16) Int16x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int32x8) ( Int32x4) Int32x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int32x16) ( Int32x8) Int32x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Int64x4) ( Int64x2) Int64x4

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Int64x8) ( Int64x4) Int64x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x16) Uint8x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x32) Uint8x64

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x8) Uint16x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x16) Uint16x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x4) Uint32x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x8) Uint32x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x2) Uint64x4

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x4) Uint64x8

/* ShiftAllLeft */

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX
func ( Int16x8) ( uint64) Int16x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX2
func ( Int16x16) ( uint64) Int16x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512
func ( Int16x32) ( uint64) Int16x32

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX
func ( Int32x4) ( uint64) Int32x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX2
func ( Int32x8) ( uint64) Int32x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512
func ( Int32x16) ( uint64) Int32x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX
func ( Int64x2) ( uint64) Int64x2

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func ( Int64x4) ( uint64) Int64x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512
func ( Int64x8) ( uint64) Int64x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX
func ( Uint16x8) ( uint64) Uint16x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX2
func ( Uint16x16) ( uint64) Uint16x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512
func ( Uint16x32) ( uint64) Uint16x32

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX
func ( Uint32x4) ( uint64) Uint32x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX2
func ( Uint32x8) ( uint64) Uint32x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512
func ( Uint32x16) ( uint64) Uint32x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX
func ( Uint64x2) ( uint64) Uint64x2

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func ( Uint64x4) ( uint64) Uint64x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512
func ( Uint64x8) ( uint64) Uint64x8

/* ShiftAllLeftConcat */

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func ( Int16x8) ( uint8,  Int16x8) Int16x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func ( Int16x16) ( uint8,  Int16x16) Int16x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func ( Int16x32) ( uint8,  Int16x32) Int16x32

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func ( Int32x4) ( uint8,  Int32x4) Int32x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func ( Int32x8) ( uint8,  Int32x8) Int32x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func ( Int32x16) ( uint8,  Int32x16) Int32x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func ( Int64x2) ( uint8,  Int64x2) Int64x2

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func ( Int64x4) ( uint8,  Int64x4) Int64x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func ( Int64x8) ( uint8,  Int64x8) Int64x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func ( Uint16x8) ( uint8,  Uint16x8) Uint16x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func ( Uint16x16) ( uint8,  Uint16x16) Uint16x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func ( Uint16x32) ( uint8,  Uint16x32) Uint16x32

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func ( Uint32x4) ( uint8,  Uint32x4) Uint32x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func ( Uint32x8) ( uint8,  Uint32x8) Uint32x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func ( Uint32x16) ( uint8,  Uint32x16) Uint32x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func ( Uint64x2) ( uint8,  Uint64x2) Uint64x2

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func ( Uint64x4) ( uint8,  Uint64x4) Uint64x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func ( Uint64x8) ( uint8,  Uint64x8) Uint64x8

/* ShiftAllRight */

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX
func ( Int16x8) ( uint64) Int16x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX2
func ( Int16x16) ( uint64) Int16x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX512
func ( Int16x32) ( uint64) Int16x32

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX
func ( Int32x4) ( uint64) Int32x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX2
func ( Int32x8) ( uint64) Int32x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX512
func ( Int32x16) ( uint64) Int32x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func ( Int64x2) ( uint64) Int64x2

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func ( Int64x4) ( uint64) Int64x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func ( Int64x8) ( uint64) Int64x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX
func ( Uint16x8) ( uint64) Uint16x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX2
func ( Uint16x16) ( uint64) Uint16x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX512
func ( Uint16x32) ( uint64) Uint16x32

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX
func ( Uint32x4) ( uint64) Uint32x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX2
func ( Uint32x8) ( uint64) Uint32x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX512
func ( Uint32x16) ( uint64) Uint32x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX
func ( Uint64x2) ( uint64) Uint64x2

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX2
func ( Uint64x4) ( uint64) Uint64x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX512
func ( Uint64x8) ( uint64) Uint64x8

/* ShiftAllRightConcat */

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func ( Int16x8) ( uint8,  Int16x8) Int16x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func ( Int16x16) ( uint8,  Int16x16) Int16x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func ( Int16x32) ( uint8,  Int16x32) Int16x32

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func ( Int32x4) ( uint8,  Int32x4) Int32x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func ( Int32x8) ( uint8,  Int32x8) Int32x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func ( Int32x16) ( uint8,  Int32x16) Int32x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func ( Int64x2) ( uint8,  Int64x2) Int64x2

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func ( Int64x4) ( uint8,  Int64x4) Int64x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func ( Int64x8) ( uint8,  Int64x8) Int64x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func ( Uint16x8) ( uint8,  Uint16x8) Uint16x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func ( Uint16x16) ( uint8,  Uint16x16) Uint16x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func ( Uint16x32) ( uint8,  Uint16x32) Uint16x32

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func ( Uint32x4) ( uint8,  Uint32x4) Uint32x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func ( Uint32x8) ( uint8,  Uint32x8) Uint32x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func ( Uint32x16) ( uint8,  Uint32x16) Uint32x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func ( Uint64x2) ( uint8,  Uint64x2) Uint64x2

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func ( Uint64x4) ( uint8,  Uint64x4) Uint64x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func ( Uint64x8) ( uint8,  Uint64x8) Uint64x8

/* ShiftLeft */

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func ( Int16x8) ( Int16x8) Int16x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func ( Int16x16) ( Int16x16) Int16x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func ( Int32x4) ( Int32x4) Int32x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func ( Int64x2) ( Int64x2) Int64x2

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func ( Uint16x8) ( Uint16x8) Uint16x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func ( Uint16x16) ( Uint16x16) Uint16x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func ( Uint32x4) ( Uint32x4) Uint32x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func ( Uint64x2) ( Uint64x2) Uint64x2

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* ShiftLeftConcat */

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func ( Int16x8) ( Int16x8,  Int16x8) Int16x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func ( Int16x16) ( Int16x16,  Int16x16) Int16x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func ( Int16x32) ( Int16x32,  Int16x32) Int16x32

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func ( Int32x4) ( Int32x4,  Int32x4) Int32x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func ( Int32x8) ( Int32x8,  Int32x8) Int32x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func ( Int32x16) ( Int32x16,  Int32x16) Int32x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func ( Int64x2) ( Int64x2,  Int64x2) Int64x2

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func ( Int64x4) ( Int64x4,  Int64x4) Int64x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func ( Int64x8) ( Int64x8,  Int64x8) Int64x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func ( Uint16x8) ( Uint16x8,  Uint16x8) Uint16x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func ( Uint16x16) ( Uint16x16,  Uint16x16) Uint16x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func ( Uint16x32) ( Uint16x32,  Uint16x32) Uint16x32

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func ( Uint32x4) ( Uint32x4,  Uint32x4) Uint32x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func ( Uint32x8) ( Uint32x8,  Uint32x8) Uint32x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func ( Uint32x16) ( Uint32x16,  Uint32x16) Uint32x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func ( Uint64x2) ( Uint64x2,  Uint64x2) Uint64x2

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func ( Uint64x4) ( Uint64x4,  Uint64x4) Uint64x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func ( Uint64x8) ( Uint64x8,  Uint64x8) Uint64x8

/* ShiftRight */

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func ( Int16x8) ( Int16x8) Int16x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func ( Int16x16) ( Int16x16) Int16x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func ( Int32x4) ( Int32x4) Int32x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func ( Int64x2) ( Int64x2) Int64x2

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func ( Int64x4) ( Int64x4) Int64x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func ( Uint16x8) ( Uint16x8) Uint16x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func ( Uint16x16) ( Uint16x16) Uint16x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func ( Uint32x4) ( Uint32x4) Uint32x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func ( Uint64x2) ( Uint64x2) Uint64x2

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* ShiftRightConcat */

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func ( Int16x8) ( Int16x8,  Int16x8) Int16x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func ( Int16x16) ( Int16x16,  Int16x16) Int16x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func ( Int16x32) ( Int16x32,  Int16x32) Int16x32

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func ( Int32x4) ( Int32x4,  Int32x4) Int32x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func ( Int32x8) ( Int32x8,  Int32x8) Int32x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func ( Int32x16) ( Int32x16,  Int32x16) Int32x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func ( Int64x2) ( Int64x2,  Int64x2) Int64x2

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func ( Int64x4) ( Int64x4,  Int64x4) Int64x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func ( Int64x8) ( Int64x8,  Int64x8) Int64x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func ( Uint16x8) ( Uint16x8,  Uint16x8) Uint16x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func ( Uint16x16) ( Uint16x16,  Uint16x16) Uint16x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func ( Uint16x32) ( Uint16x32,  Uint16x32) Uint16x32

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func ( Uint32x4) ( Uint32x4,  Uint32x4) Uint32x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func ( Uint32x8) ( Uint32x8,  Uint32x8) Uint32x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func ( Uint32x16) ( Uint32x16,  Uint32x16) Uint32x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func ( Uint64x2) ( Uint64x2,  Uint64x2) Uint64x2

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func ( Uint64x4) ( Uint64x4,  Uint64x4) Uint64x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func ( Uint64x8) ( Uint64x8,  Uint64x8) Uint64x8

/* Sqrt */

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX512
func ( Float32x16) () Float32x16

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX
func ( Float64x2) () Float64x2

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX
func ( Float64x4) () Float64x4

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX512
func ( Float64x8) () Float64x8

/* Sub */

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX512
func ( Float32x16) ( Float32x16) Float32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX512
func ( Float64x8) ( Float64x8) Float64x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

/* SubPairs */

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func ( Float32x4) ( Float32x4) Float32x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func ( Float32x8) ( Float32x8) Float32x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPD, CPU Feature: AVX
func ( Float64x2) ( Float64x2) Float64x2

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPD, CPU Feature: AVX
func ( Float64x4) ( Float64x4) Float64x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

/* SubPairsSaturated */

// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

/* SubSaturated */

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSB, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSB, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSB, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSW, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSW, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSW, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

/* SumAbsDiff */

// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
//
// Asm: VPSADBW, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint16x8

// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
//
// Asm: VPSADBW, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint16x16

// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
//
// Asm: VPSADBW, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint16x32

/* Trunc */

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x4) () Float32x4

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPS, CPU Feature: AVX
func ( Float32x8) () Float32x8

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x2) () Float64x2

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPD, CPU Feature: AVX
func ( Float64x4) () Float64x4

/* TruncScaled */

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* TruncScaledResidue */

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x4) ( uint8) Float32x4

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x8) ( uint8) Float32x8

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func ( Float32x16) ( uint8) Float32x16

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x2) ( uint8) Float64x2

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x4) ( uint8) Float64x4

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func ( Float64x8) ( uint8) Float64x8

/* TruncateToInt8 */

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func ( Int16x8) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func ( Int16x16) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func ( Int16x32) () Int8x32

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func ( Int32x4) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func ( Int32x8) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func ( Int32x16) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func ( Int64x2) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func ( Int64x4) () Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func ( Int64x8) () Int8x16

/* TruncateToInt16 */

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func ( Int32x4) () Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func ( Int32x8) () Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func ( Int32x16) () Int16x16

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func ( Int64x2) () Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func ( Int64x4) () Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func ( Int64x8) () Int16x8

/* TruncateToInt32 */

// TruncateToInt32 converts element values to int32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func ( Int64x2) () Int32x4

// TruncateToInt32 converts element values to int32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func ( Int64x4) () Int32x4

// TruncateToInt32 converts element values to int32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func ( Int64x8) () Int32x8

/* TruncateToUint8 */

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func ( Uint16x8) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func ( Uint16x16) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func ( Uint16x32) () Uint8x32

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func ( Uint32x4) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func ( Uint32x8) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func ( Uint32x16) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func ( Uint64x2) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func ( Uint64x4) () Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func ( Uint64x8) () Uint8x16

/* TruncateToUint16 */

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func ( Uint32x4) () Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func ( Uint32x8) () Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func ( Uint32x16) () Uint16x16

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func ( Uint64x2) () Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func ( Uint64x4) () Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func ( Uint64x8) () Uint16x8

/* TruncateToUint32 */

// TruncateToUint32 converts element values to uint32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func ( Uint64x2) () Uint32x4

// TruncateToUint32 converts element values to uint32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func ( Uint64x4) () Uint32x4

// TruncateToUint32 converts element values to uint32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func ( Uint64x8) () Uint32x8

/* Xor */

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Int8x16) ( Int8x16) Int8x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Int8x32) ( Int8x32) Int8x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func ( Int8x64) ( Int8x64) Int8x64

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Int16x8) ( Int16x8) Int16x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Int16x16) ( Int16x16) Int16x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func ( Int16x32) ( Int16x32) Int16x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Int32x4) ( Int32x4) Int32x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Int32x8) ( Int32x8) Int32x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func ( Int32x16) ( Int32x16) Int32x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Int64x2) ( Int64x2) Int64x2

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Int64x4) ( Int64x4) Int64x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512
func ( Int64x8) ( Int64x8) Int64x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Uint8x16) ( Uint8x16) Uint8x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Uint8x32) ( Uint8x32) Uint8x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func ( Uint8x64) ( Uint8x64) Uint8x64

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Uint16x8) ( Uint16x8) Uint16x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Uint16x16) ( Uint16x16) Uint16x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func ( Uint16x32) ( Uint16x32) Uint16x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Uint32x4) ( Uint32x4) Uint32x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Uint32x8) ( Uint32x8) Uint32x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func ( Uint32x16) ( Uint32x16) Uint32x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func ( Uint64x2) ( Uint64x2) Uint64x2

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func ( Uint64x4) ( Uint64x4) Uint64x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512
func ( Uint64x8) ( Uint64x8) Uint64x8

// Float64x2 converts from Float32x4 to Float64x2
func ( Float32x4) () ( Float64x2)

// Int8x16 converts from Float32x4 to Int8x16
func ( Float32x4) () ( Int8x16)

// Int16x8 converts from Float32x4 to Int16x8
func ( Float32x4) () ( Int16x8)

// Int32x4 converts from Float32x4 to Int32x4
func ( Float32x4) () ( Int32x4)

// Int64x2 converts from Float32x4 to Int64x2
func ( Float32x4) () ( Int64x2)

// Uint8x16 converts from Float32x4 to Uint8x16
func ( Float32x4) () ( Uint8x16)

// Uint16x8 converts from Float32x4 to Uint16x8
func ( Float32x4) () ( Uint16x8)

// Uint32x4 converts from Float32x4 to Uint32x4
func ( Float32x4) () ( Uint32x4)

// Uint64x2 converts from Float32x4 to Uint64x2
func ( Float32x4) () ( Uint64x2)

// Float64x4 converts from Float32x8 to Float64x4
func ( Float32x8) () ( Float64x4)

// Int8x32 converts from Float32x8 to Int8x32
func ( Float32x8) () ( Int8x32)

// Int16x16 converts from Float32x8 to Int16x16
func ( Float32x8) () ( Int16x16)

// Int32x8 converts from Float32x8 to Int32x8
func ( Float32x8) () ( Int32x8)

// Int64x4 converts from Float32x8 to Int64x4
func ( Float32x8) () ( Int64x4)

// Uint8x32 converts from Float32x8 to Uint8x32
func ( Float32x8) () ( Uint8x32)

// Uint16x16 converts from Float32x8 to Uint16x16
func ( Float32x8) () ( Uint16x16)

// Uint32x8 converts from Float32x8 to Uint32x8
func ( Float32x8) () ( Uint32x8)

// Uint64x4 converts from Float32x8 to Uint64x4
func ( Float32x8) () ( Uint64x4)

// Float64x8 converts from Float32x16 to Float64x8
func ( Float32x16) () ( Float64x8)

// Int8x64 converts from Float32x16 to Int8x64
func ( Float32x16) () ( Int8x64)

// Int16x32 converts from Float32x16 to Int16x32
func ( Float32x16) () ( Int16x32)

// Int32x16 converts from Float32x16 to Int32x16
func ( Float32x16) () ( Int32x16)

// Int64x8 converts from Float32x16 to Int64x8
func ( Float32x16) () ( Int64x8)

// Uint8x64 converts from Float32x16 to Uint8x64
func ( Float32x16) () ( Uint8x64)

// Uint16x32 converts from Float32x16 to Uint16x32
func ( Float32x16) () ( Uint16x32)

// Uint32x16 converts from Float32x16 to Uint32x16
func ( Float32x16) () ( Uint32x16)

// Uint64x8 converts from Float32x16 to Uint64x8
func ( Float32x16) () ( Uint64x8)

// Float32x4 converts from Float64x2 to Float32x4
func ( Float64x2) () ( Float32x4)

// Int8x16 converts from Float64x2 to Int8x16
func ( Float64x2) () ( Int8x16)

// Int16x8 converts from Float64x2 to Int16x8
func ( Float64x2) () ( Int16x8)

// Int32x4 converts from Float64x2 to Int32x4
func ( Float64x2) () ( Int32x4)

// Int64x2 converts from Float64x2 to Int64x2
func ( Float64x2) () ( Int64x2)

// Uint8x16 converts from Float64x2 to Uint8x16
func ( Float64x2) () ( Uint8x16)

// Uint16x8 converts from Float64x2 to Uint16x8
func ( Float64x2) () ( Uint16x8)

// Uint32x4 converts from Float64x2 to Uint32x4
func ( Float64x2) () ( Uint32x4)

// Uint64x2 converts from Float64x2 to Uint64x2
func ( Float64x2) () ( Uint64x2)

// Float32x8 converts from Float64x4 to Float32x8
func ( Float64x4) () ( Float32x8)

// Int8x32 converts from Float64x4 to Int8x32
func ( Float64x4) () ( Int8x32)

// Int16x16 converts from Float64x4 to Int16x16
func ( Float64x4) () ( Int16x16)

// Int32x8 converts from Float64x4 to Int32x8
func ( Float64x4) () ( Int32x8)

// Int64x4 converts from Float64x4 to Int64x4
func ( Float64x4) () ( Int64x4)

// Uint8x32 converts from Float64x4 to Uint8x32
func ( Float64x4) () ( Uint8x32)

// Uint16x16 converts from Float64x4 to Uint16x16
func ( Float64x4) () ( Uint16x16)

// Uint32x8 converts from Float64x4 to Uint32x8
func ( Float64x4) () ( Uint32x8)

// Uint64x4 converts from Float64x4 to Uint64x4
func ( Float64x4) () ( Uint64x4)

// Float32x16 converts from Float64x8 to Float32x16
func ( Float64x8) () ( Float32x16)

// Int8x64 converts from Float64x8 to Int8x64
func ( Float64x8) () ( Int8x64)

// Int16x32 converts from Float64x8 to Int16x32
func ( Float64x8) () ( Int16x32)

// Int32x16 converts from Float64x8 to Int32x16
func ( Float64x8) () ( Int32x16)

// Int64x8 converts from Float64x8 to Int64x8
func ( Float64x8) () ( Int64x8)

// Uint8x64 converts from Float64x8 to Uint8x64
func ( Float64x8) () ( Uint8x64)

// Uint16x32 converts from Float64x8 to Uint16x32
func ( Float64x8) () ( Uint16x32)

// Uint32x16 converts from Float64x8 to Uint32x16
func ( Float64x8) () ( Uint32x16)

// Uint64x8 converts from Float64x8 to Uint64x8
func ( Float64x8) () ( Uint64x8)

// Float32x4 converts from Int8x16 to Float32x4
func ( Int8x16) () ( Float32x4)

// Float64x2 converts from Int8x16 to Float64x2
func ( Int8x16) () ( Float64x2)

// Int16x8 converts from Int8x16 to Int16x8
func ( Int8x16) () ( Int16x8)

// Int32x4 converts from Int8x16 to Int32x4
func ( Int8x16) () ( Int32x4)

// Int64x2 converts from Int8x16 to Int64x2
func ( Int8x16) () ( Int64x2)

// Uint8x16 converts from Int8x16 to Uint8x16
func ( Int8x16) () ( Uint8x16)

// Uint16x8 converts from Int8x16 to Uint16x8
func ( Int8x16) () ( Uint16x8)

// Uint32x4 converts from Int8x16 to Uint32x4
func ( Int8x16) () ( Uint32x4)

// Uint64x2 converts from Int8x16 to Uint64x2
func ( Int8x16) () ( Uint64x2)

// Float32x8 converts from Int8x32 to Float32x8
func ( Int8x32) () ( Float32x8)

// Float64x4 converts from Int8x32 to Float64x4
func ( Int8x32) () ( Float64x4)

// Int16x16 converts from Int8x32 to Int16x16
func ( Int8x32) () ( Int16x16)

// Int32x8 converts from Int8x32 to Int32x8
func ( Int8x32) () ( Int32x8)

// Int64x4 converts from Int8x32 to Int64x4
func ( Int8x32) () ( Int64x4)

// Uint8x32 converts from Int8x32 to Uint8x32
func ( Int8x32) () ( Uint8x32)

// Uint16x16 converts from Int8x32 to Uint16x16
func ( Int8x32) () ( Uint16x16)

// Uint32x8 converts from Int8x32 to Uint32x8
func ( Int8x32) () ( Uint32x8)

// Uint64x4 converts from Int8x32 to Uint64x4
func ( Int8x32) () ( Uint64x4)

// Float32x16 converts from Int8x64 to Float32x16
func ( Int8x64) () ( Float32x16)

// Float64x8 converts from Int8x64 to Float64x8
func ( Int8x64) () ( Float64x8)

// Int16x32 converts from Int8x64 to Int16x32
func ( Int8x64) () ( Int16x32)

// Int32x16 converts from Int8x64 to Int32x16
func ( Int8x64) () ( Int32x16)

// Int64x8 converts from Int8x64 to Int64x8
func ( Int8x64) () ( Int64x8)

// Uint8x64 converts from Int8x64 to Uint8x64
func ( Int8x64) () ( Uint8x64)

// Uint16x32 converts from Int8x64 to Uint16x32
func ( Int8x64) () ( Uint16x32)

// Uint32x16 converts from Int8x64 to Uint32x16
func ( Int8x64) () ( Uint32x16)

// Uint64x8 converts from Int8x64 to Uint64x8
func ( Int8x64) () ( Uint64x8)

// Float32x4 converts from Int16x8 to Float32x4
func ( Int16x8) () ( Float32x4)

// Float64x2 converts from Int16x8 to Float64x2
func ( Int16x8) () ( Float64x2)

// Int8x16 converts from Int16x8 to Int8x16
func ( Int16x8) () ( Int8x16)

// Int32x4 converts from Int16x8 to Int32x4
func ( Int16x8) () ( Int32x4)

// Int64x2 converts from Int16x8 to Int64x2
func ( Int16x8) () ( Int64x2)

// Uint8x16 converts from Int16x8 to Uint8x16
func ( Int16x8) () ( Uint8x16)

// Uint16x8 converts from Int16x8 to Uint16x8
func ( Int16x8) () ( Uint16x8)

// Uint32x4 converts from Int16x8 to Uint32x4
func ( Int16x8) () ( Uint32x4)

// Uint64x2 converts from Int16x8 to Uint64x2
func ( Int16x8) () ( Uint64x2)

// Float32x8 converts from Int16x16 to Float32x8
func ( Int16x16) () ( Float32x8)

// Float64x4 converts from Int16x16 to Float64x4
func ( Int16x16) () ( Float64x4)

// Int8x32 converts from Int16x16 to Int8x32
func ( Int16x16) () ( Int8x32)

// Int32x8 converts from Int16x16 to Int32x8
func ( Int16x16) () ( Int32x8)

// Int64x4 converts from Int16x16 to Int64x4
func ( Int16x16) () ( Int64x4)

// Uint8x32 converts from Int16x16 to Uint8x32
func ( Int16x16) () ( Uint8x32)

// Uint16x16 converts from Int16x16 to Uint16x16
func ( Int16x16) () ( Uint16x16)

// Uint32x8 converts from Int16x16 to Uint32x8
func ( Int16x16) () ( Uint32x8)

// Uint64x4 converts from Int16x16 to Uint64x4
func ( Int16x16) () ( Uint64x4)

// Float32x16 converts from Int16x32 to Float32x16
func ( Int16x32) () ( Float32x16)

// Float64x8 converts from Int16x32 to Float64x8
func ( Int16x32) () ( Float64x8)

// Int8x64 converts from Int16x32 to Int8x64
func ( Int16x32) () ( Int8x64)

// Int32x16 converts from Int16x32 to Int32x16
func ( Int16x32) () ( Int32x16)

// Int64x8 converts from Int16x32 to Int64x8
func ( Int16x32) () ( Int64x8)

// Uint8x64 converts from Int16x32 to Uint8x64
func ( Int16x32) () ( Uint8x64)

// Uint16x32 converts from Int16x32 to Uint16x32
func ( Int16x32) () ( Uint16x32)

// Uint32x16 converts from Int16x32 to Uint32x16
func ( Int16x32) () ( Uint32x16)

// Uint64x8 converts from Int16x32 to Uint64x8
func ( Int16x32) () ( Uint64x8)

// Float32x4 converts from Int32x4 to Float32x4
func ( Int32x4) () ( Float32x4)

// Float64x2 converts from Int32x4 to Float64x2
func ( Int32x4) () ( Float64x2)

// Int8x16 converts from Int32x4 to Int8x16
func ( Int32x4) () ( Int8x16)

// Int16x8 converts from Int32x4 to Int16x8
func ( Int32x4) () ( Int16x8)

// Int64x2 converts from Int32x4 to Int64x2
func ( Int32x4) () ( Int64x2)

// Uint8x16 converts from Int32x4 to Uint8x16
func ( Int32x4) () ( Uint8x16)

// Uint16x8 converts from Int32x4 to Uint16x8
func ( Int32x4) () ( Uint16x8)

// Uint32x4 converts from Int32x4 to Uint32x4
func ( Int32x4) () ( Uint32x4)

// Uint64x2 converts from Int32x4 to Uint64x2
func ( Int32x4) () ( Uint64x2)

// Float32x8 converts from Int32x8 to Float32x8
func ( Int32x8) () ( Float32x8)

// Float64x4 converts from Int32x8 to Float64x4
func ( Int32x8) () ( Float64x4)

// Int8x32 converts from Int32x8 to Int8x32
func ( Int32x8) () ( Int8x32)

// Int16x16 converts from Int32x8 to Int16x16
func ( Int32x8) () ( Int16x16)

// Int64x4 converts from Int32x8 to Int64x4
func ( Int32x8) () ( Int64x4)

// Uint8x32 converts from Int32x8 to Uint8x32
func ( Int32x8) () ( Uint8x32)

// Uint16x16 converts from Int32x8 to Uint16x16
func ( Int32x8) () ( Uint16x16)

// Uint32x8 converts from Int32x8 to Uint32x8
func ( Int32x8) () ( Uint32x8)

// Uint64x4 converts from Int32x8 to Uint64x4
func ( Int32x8) () ( Uint64x4)

// Float32x16 converts from Int32x16 to Float32x16
func ( Int32x16) () ( Float32x16)

// Float64x8 converts from Int32x16 to Float64x8
func ( Int32x16) () ( Float64x8)

// Int8x64 converts from Int32x16 to Int8x64
func ( Int32x16) () ( Int8x64)

// Int16x32 converts from Int32x16 to Int16x32
func ( Int32x16) () ( Int16x32)

// Int64x8 converts from Int32x16 to Int64x8
func ( Int32x16) () ( Int64x8)

// Uint8x64 converts from Int32x16 to Uint8x64
func ( Int32x16) () ( Uint8x64)

// Uint16x32 converts from Int32x16 to Uint16x32
func ( Int32x16) () ( Uint16x32)

// Uint32x16 converts from Int32x16 to Uint32x16
func ( Int32x16) () ( Uint32x16)

// Uint64x8 converts from Int32x16 to Uint64x8
func ( Int32x16) () ( Uint64x8)

// Float32x4 converts from Int64x2 to Float32x4
func ( Int64x2) () ( Float32x4)

// Float64x2 converts from Int64x2 to Float64x2
func ( Int64x2) () ( Float64x2)

// Int8x16 converts from Int64x2 to Int8x16
func ( Int64x2) () ( Int8x16)

// Int16x8 converts from Int64x2 to Int16x8
func ( Int64x2) () ( Int16x8)

// Int32x4 converts from Int64x2 to Int32x4
func ( Int64x2) () ( Int32x4)

// Uint8x16 converts from Int64x2 to Uint8x16
func ( Int64x2) () ( Uint8x16)

// Uint16x8 converts from Int64x2 to Uint16x8
func ( Int64x2) () ( Uint16x8)

// Uint32x4 converts from Int64x2 to Uint32x4
func ( Int64x2) () ( Uint32x4)

// Uint64x2 converts from Int64x2 to Uint64x2
func ( Int64x2) () ( Uint64x2)

// Float32x8 converts from Int64x4 to Float32x8
func ( Int64x4) () ( Float32x8)

// Float64x4 converts from Int64x4 to Float64x4
func ( Int64x4) () ( Float64x4)

// Int8x32 converts from Int64x4 to Int8x32
func ( Int64x4) () ( Int8x32)

// Int16x16 converts from Int64x4 to Int16x16
func ( Int64x4) () ( Int16x16)

// Int32x8 converts from Int64x4 to Int32x8
func ( Int64x4) () ( Int32x8)

// Uint8x32 converts from Int64x4 to Uint8x32
func ( Int64x4) () ( Uint8x32)

// Uint16x16 converts from Int64x4 to Uint16x16
func ( Int64x4) () ( Uint16x16)

// Uint32x8 converts from Int64x4 to Uint32x8
func ( Int64x4) () ( Uint32x8)

// Uint64x4 converts from Int64x4 to Uint64x4
func ( Int64x4) () ( Uint64x4)

// Float32x16 converts from Int64x8 to Float32x16
func ( Int64x8) () ( Float32x16)

// Float64x8 converts from Int64x8 to Float64x8
func ( Int64x8) () ( Float64x8)

// Int8x64 converts from Int64x8 to Int8x64
func ( Int64x8) () ( Int8x64)

// Int16x32 converts from Int64x8 to Int16x32
func ( Int64x8) () ( Int16x32)

// Int32x16 converts from Int64x8 to Int32x16
func ( Int64x8) () ( Int32x16)

// Uint8x64 converts from Int64x8 to Uint8x64
func ( Int64x8) () ( Uint8x64)

// Uint16x32 converts from Int64x8 to Uint16x32
func ( Int64x8) () ( Uint16x32)

// Uint32x16 converts from Int64x8 to Uint32x16
func ( Int64x8) () ( Uint32x16)

// Uint64x8 converts from Int64x8 to Uint64x8
func ( Int64x8) () ( Uint64x8)

// Float32x4 converts from Uint8x16 to Float32x4
func ( Uint8x16) () ( Float32x4)

// Float64x2 converts from Uint8x16 to Float64x2
func ( Uint8x16) () ( Float64x2)

// Int8x16 converts from Uint8x16 to Int8x16
func ( Uint8x16) () ( Int8x16)

// Int16x8 converts from Uint8x16 to Int16x8
func ( Uint8x16) () ( Int16x8)

// Int32x4 converts from Uint8x16 to Int32x4
func ( Uint8x16) () ( Int32x4)

// Int64x2 converts from Uint8x16 to Int64x2
func ( Uint8x16) () ( Int64x2)

// Uint16x8 converts from Uint8x16 to Uint16x8
func ( Uint8x16) () ( Uint16x8)

// Uint32x4 converts from Uint8x16 to Uint32x4
func ( Uint8x16) () ( Uint32x4)

// Uint64x2 converts from Uint8x16 to Uint64x2
func ( Uint8x16) () ( Uint64x2)

// Float32x8 converts from Uint8x32 to Float32x8
func ( Uint8x32) () ( Float32x8)

// Float64x4 converts from Uint8x32 to Float64x4
func ( Uint8x32) () ( Float64x4)

// Int8x32 converts from Uint8x32 to Int8x32
func ( Uint8x32) () ( Int8x32)

// Int16x16 converts from Uint8x32 to Int16x16
func ( Uint8x32) () ( Int16x16)

// Int32x8 converts from Uint8x32 to Int32x8
func ( Uint8x32) () ( Int32x8)

// Int64x4 converts from Uint8x32 to Int64x4
func ( Uint8x32) () ( Int64x4)

// Uint16x16 converts from Uint8x32 to Uint16x16
func ( Uint8x32) () ( Uint16x16)

// Uint32x8 converts from Uint8x32 to Uint32x8
func ( Uint8x32) () ( Uint32x8)

// Uint64x4 converts from Uint8x32 to Uint64x4
func ( Uint8x32) () ( Uint64x4)

// Float32x16 converts from Uint8x64 to Float32x16
func ( Uint8x64) () ( Float32x16)

// Float64x8 converts from Uint8x64 to Float64x8
func ( Uint8x64) () ( Float64x8)

// Int8x64 converts from Uint8x64 to Int8x64
func ( Uint8x64) () ( Int8x64)

// Int16x32 converts from Uint8x64 to Int16x32
func ( Uint8x64) () ( Int16x32)

// Int32x16 converts from Uint8x64 to Int32x16
func ( Uint8x64) () ( Int32x16)

// Int64x8 converts from Uint8x64 to Int64x8
func ( Uint8x64) () ( Int64x8)

// Uint16x32 converts from Uint8x64 to Uint16x32
func ( Uint8x64) () ( Uint16x32)

// Uint32x16 converts from Uint8x64 to Uint32x16
func ( Uint8x64) () ( Uint32x16)

// Uint64x8 converts from Uint8x64 to Uint64x8
func ( Uint8x64) () ( Uint64x8)

// Float32x4 converts from Uint16x8 to Float32x4
func ( Uint16x8) () ( Float32x4)

// Float64x2 converts from Uint16x8 to Float64x2
func ( Uint16x8) () ( Float64x2)

// Int8x16 converts from Uint16x8 to Int8x16
func ( Uint16x8) () ( Int8x16)

// Int16x8 converts from Uint16x8 to Int16x8
func ( Uint16x8) () ( Int16x8)

// Int32x4 converts from Uint16x8 to Int32x4
func ( Uint16x8) () ( Int32x4)

// Int64x2 converts from Uint16x8 to Int64x2
func ( Uint16x8) () ( Int64x2)

// Uint8x16 converts from Uint16x8 to Uint8x16
func ( Uint16x8) () ( Uint8x16)

// Uint32x4 converts from Uint16x8 to Uint32x4
func ( Uint16x8) () ( Uint32x4)

// Uint64x2 converts from Uint16x8 to Uint64x2
func ( Uint16x8) () ( Uint64x2)

// Float32x8 converts from Uint16x16 to Float32x8
func ( Uint16x16) () ( Float32x8)

// Float64x4 converts from Uint16x16 to Float64x4
func ( Uint16x16) () ( Float64x4)

// Int8x32 converts from Uint16x16 to Int8x32
func ( Uint16x16) () ( Int8x32)

// Int16x16 converts from Uint16x16 to Int16x16
func ( Uint16x16) () ( Int16x16)

// Int32x8 converts from Uint16x16 to Int32x8
func ( Uint16x16) () ( Int32x8)

// Int64x4 converts from Uint16x16 to Int64x4
func ( Uint16x16) () ( Int64x4)

// Uint8x32 converts from Uint16x16 to Uint8x32
func ( Uint16x16) () ( Uint8x32)

// Uint32x8 converts from Uint16x16 to Uint32x8
func ( Uint16x16) () ( Uint32x8)

// Uint64x4 converts from Uint16x16 to Uint64x4
func ( Uint16x16) () ( Uint64x4)

// Float32x16 converts from Uint16x32 to Float32x16
func ( Uint16x32) () ( Float32x16)

// Float64x8 converts from Uint16x32 to Float64x8
func ( Uint16x32) () ( Float64x8)

// Int8x64 converts from Uint16x32 to Int8x64
func ( Uint16x32) () ( Int8x64)

// Int16x32 converts from Uint16x32 to Int16x32
func ( Uint16x32) () ( Int16x32)

// Int32x16 converts from Uint16x32 to Int32x16
func ( Uint16x32) () ( Int32x16)

// Int64x8 converts from Uint16x32 to Int64x8
func ( Uint16x32) () ( Int64x8)

// Uint8x64 converts from Uint16x32 to Uint8x64
func ( Uint16x32) () ( Uint8x64)

// Uint32x16 converts from Uint16x32 to Uint32x16
func ( Uint16x32) () ( Uint32x16)

// Uint64x8 converts from Uint16x32 to Uint64x8
func ( Uint16x32) () ( Uint64x8)

// Float32x4 converts from Uint32x4 to Float32x4
func ( Uint32x4) () ( Float32x4)

// Float64x2 converts from Uint32x4 to Float64x2
func ( Uint32x4) () ( Float64x2)

// Int8x16 converts from Uint32x4 to Int8x16
func ( Uint32x4) () ( Int8x16)

// Int16x8 converts from Uint32x4 to Int16x8
func ( Uint32x4) () ( Int16x8)

// Int32x4 converts from Uint32x4 to Int32x4
func ( Uint32x4) () ( Int32x4)

// Int64x2 converts from Uint32x4 to Int64x2
func ( Uint32x4) () ( Int64x2)

// Uint8x16 converts from Uint32x4 to Uint8x16
func ( Uint32x4) () ( Uint8x16)

// Uint16x8 converts from Uint32x4 to Uint16x8
func ( Uint32x4) () ( Uint16x8)

// Uint64x2 converts from Uint32x4 to Uint64x2
func ( Uint32x4) () ( Uint64x2)

// Float32x8 converts from Uint32x8 to Float32x8
func ( Uint32x8) () ( Float32x8)

// Float64x4 converts from Uint32x8 to Float64x4
func ( Uint32x8) () ( Float64x4)

// Int8x32 converts from Uint32x8 to Int8x32
func ( Uint32x8) () ( Int8x32)

// Int16x16 converts from Uint32x8 to Int16x16
func ( Uint32x8) () ( Int16x16)

// Int32x8 converts from Uint32x8 to Int32x8
func ( Uint32x8) () ( Int32x8)

// Int64x4 converts from Uint32x8 to Int64x4
func ( Uint32x8) () ( Int64x4)

// Uint8x32 converts from Uint32x8 to Uint8x32
func ( Uint32x8) () ( Uint8x32)

// Uint16x16 converts from Uint32x8 to Uint16x16
func ( Uint32x8) () ( Uint16x16)

// Uint64x4 converts from Uint32x8 to Uint64x4
func ( Uint32x8) () ( Uint64x4)

// Float32x16 converts from Uint32x16 to Float32x16
func ( Uint32x16) () ( Float32x16)

// Float64x8 converts from Uint32x16 to Float64x8
func ( Uint32x16) () ( Float64x8)

// Int8x64 converts from Uint32x16 to Int8x64
func ( Uint32x16) () ( Int8x64)

// Int16x32 converts from Uint32x16 to Int16x32
func ( Uint32x16) () ( Int16x32)

// Int32x16 converts from Uint32x16 to Int32x16
func ( Uint32x16) () ( Int32x16)

// Int64x8 converts from Uint32x16 to Int64x8
func ( Uint32x16) () ( Int64x8)

// Uint8x64 converts from Uint32x16 to Uint8x64
func ( Uint32x16) () ( Uint8x64)

// Uint16x32 converts from Uint32x16 to Uint16x32
func ( Uint32x16) () ( Uint16x32)

// Uint64x8 converts from Uint32x16 to Uint64x8
func ( Uint32x16) () ( Uint64x8)

// Float32x4 converts from Uint64x2 to Float32x4
func ( Uint64x2) () ( Float32x4)

// Float64x2 converts from Uint64x2 to Float64x2
func ( Uint64x2) () ( Float64x2)

// Int8x16 converts from Uint64x2 to Int8x16
func ( Uint64x2) () ( Int8x16)

// Int16x8 converts from Uint64x2 to Int16x8
func ( Uint64x2) () ( Int16x8)

// Int32x4 converts from Uint64x2 to Int32x4
func ( Uint64x2) () ( Int32x4)

// Int64x2 converts from Uint64x2 to Int64x2
func ( Uint64x2) () ( Int64x2)

// Uint8x16 converts from Uint64x2 to Uint8x16
func ( Uint64x2) () ( Uint8x16)

// Uint16x8 converts from Uint64x2 to Uint16x8
func ( Uint64x2) () ( Uint16x8)

// Uint32x4 converts from Uint64x2 to Uint32x4
func ( Uint64x2) () ( Uint32x4)

// Float32x8 converts from Uint64x4 to Float32x8
func ( Uint64x4) () ( Float32x8)

// Float64x4 converts from Uint64x4 to Float64x4
func ( Uint64x4) () ( Float64x4)

// Int8x32 converts from Uint64x4 to Int8x32
func ( Uint64x4) () ( Int8x32)

// Int16x16 converts from Uint64x4 to Int16x16
func ( Uint64x4) () ( Int16x16)

// Int32x8 converts from Uint64x4 to Int32x8
func ( Uint64x4) () ( Int32x8)

// Int64x4 converts from Uint64x4 to Int64x4
func ( Uint64x4) () ( Int64x4)

// Uint8x32 converts from Uint64x4 to Uint8x32
func ( Uint64x4) () ( Uint8x32)

// Uint16x16 converts from Uint64x4 to Uint16x16
func ( Uint64x4) () ( Uint16x16)

// Uint32x8 converts from Uint64x4 to Uint32x8
func ( Uint64x4) () ( Uint32x8)

// Float32x16 converts from Uint64x8 to Float32x16
func ( Uint64x8) () ( Float32x16)

// Float64x8 converts from Uint64x8 to Float64x8
func ( Uint64x8) () ( Float64x8)

// Int8x64 converts from Uint64x8 to Int8x64
func ( Uint64x8) () ( Int8x64)

// Int16x32 converts from Uint64x8 to Int16x32
func ( Uint64x8) () ( Int16x32)

// Int32x16 converts from Uint64x8 to Int32x16
func ( Uint64x8) () ( Int32x16)

// Int64x8 converts from Uint64x8 to Int64x8
func ( Uint64x8) () ( Int64x8)

// Uint8x64 converts from Uint64x8 to Uint8x64
func ( Uint64x8) () ( Uint8x64)

// Uint16x32 converts from Uint64x8 to Uint16x32
func ( Uint64x8) () ( Uint16x32)

// Uint32x16 converts from Uint64x8 to Uint32x16
func ( Uint64x8) () ( Uint32x16)

// ToInt8x16 converts from Mask8x16 to Int8x16
func ( Mask8x16) () ( Int8x16)

// asMask converts from Int8x16 to Mask8x16
func ( Int8x16) () ( Mask8x16)

func ( Mask8x16) ( Mask8x16) Mask8x16

func ( Mask8x16) ( Mask8x16) Mask8x16

// ToInt8x32 converts from Mask8x32 to Int8x32
func ( Mask8x32) () ( Int8x32)

// asMask converts from Int8x32 to Mask8x32
func ( Int8x32) () ( Mask8x32)

func ( Mask8x32) ( Mask8x32) Mask8x32

func ( Mask8x32) ( Mask8x32) Mask8x32

// ToInt8x64 converts from Mask8x64 to Int8x64
func ( Mask8x64) () ( Int8x64)

// asMask converts from Int8x64 to Mask8x64
func ( Int8x64) () ( Mask8x64)

func ( Mask8x64) ( Mask8x64) Mask8x64

func ( Mask8x64) ( Mask8x64) Mask8x64

// ToInt16x8 converts from Mask16x8 to Int16x8
func ( Mask16x8) () ( Int16x8)

// asMask converts from Int16x8 to Mask16x8
func ( Int16x8) () ( Mask16x8)

func ( Mask16x8) ( Mask16x8) Mask16x8

func ( Mask16x8) ( Mask16x8) Mask16x8

// ToInt16x16 converts from Mask16x16 to Int16x16
func ( Mask16x16) () ( Int16x16)

// asMask converts from Int16x16 to Mask16x16
func ( Int16x16) () ( Mask16x16)

func ( Mask16x16) ( Mask16x16) Mask16x16

func ( Mask16x16) ( Mask16x16) Mask16x16

// ToInt16x32 converts from Mask16x32 to Int16x32
func ( Mask16x32) () ( Int16x32)

// asMask converts from Int16x32 to Mask16x32
func ( Int16x32) () ( Mask16x32)

func ( Mask16x32) ( Mask16x32) Mask16x32

func ( Mask16x32) ( Mask16x32) Mask16x32

// ToInt32x4 converts from Mask32x4 to Int32x4
func ( Mask32x4) () ( Int32x4)

// asMask converts from Int32x4 to Mask32x4
func ( Int32x4) () ( Mask32x4)

func ( Mask32x4) ( Mask32x4) Mask32x4

func ( Mask32x4) ( Mask32x4) Mask32x4

// ToInt32x8 converts from Mask32x8 to Int32x8
func ( Mask32x8) () ( Int32x8)

// asMask converts from Int32x8 to Mask32x8
func ( Int32x8) () ( Mask32x8)

func ( Mask32x8) ( Mask32x8) Mask32x8

func ( Mask32x8) ( Mask32x8) Mask32x8

// ToInt32x16 converts from Mask32x16 to Int32x16
func ( Mask32x16) () ( Int32x16)

// asMask converts from Int32x16 to Mask32x16
func ( Int32x16) () ( Mask32x16)

func ( Mask32x16) ( Mask32x16) Mask32x16

func ( Mask32x16) ( Mask32x16) Mask32x16

// ToInt64x2 converts from Mask64x2 to Int64x2
func ( Mask64x2) () ( Int64x2)

// asMask converts from Int64x2 to Mask64x2
func ( Int64x2) () ( Mask64x2)

func ( Mask64x2) ( Mask64x2) Mask64x2

func ( Mask64x2) ( Mask64x2) Mask64x2

// ToInt64x4 converts from Mask64x4 to Int64x4
func ( Mask64x4) () ( Int64x4)

// asMask converts from Int64x4 to Mask64x4
func ( Int64x4) () ( Mask64x4)

func ( Mask64x4) ( Mask64x4) Mask64x4

func ( Mask64x4) ( Mask64x4) Mask64x4

// ToInt64x8 converts from Mask64x8 to Int64x8
func ( Mask64x8) () ( Int64x8)

// asMask converts from Int64x8 to Mask64x8
func ( Int64x8) () ( Mask64x8)

func ( Mask64x8) ( Mask64x8) Mask64x8

func ( Mask64x8) ( Mask64x8) Mask64x8