Source: ops_amd64.go in package simd/archsimd

Source File
	ops_amd64.go

Belonging Package
	simd/archsimd

// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.

//go:build goexperiment.simd

package archsimd

/* AESDecryptLastRound */

// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX, AES
func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16

// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX512VAES
func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32

// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX512VAES
func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64

/* AESDecryptOneRound */

// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX, AES
func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16

// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX512VAES
func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32

// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX512VAES
func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64

/* AESEncryptLastRound */

// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX, AES
func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16

// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX512VAES
func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32

// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX512VAES
func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64

/* AESEncryptOneRound */

// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX, AES
func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16

// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX512VAES
func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32

// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX512VAES
func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64

/* AESInvMixColumns */

// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
// x is the chunk of w array in use.
// result = InvMixColumns(x)
//
// Asm: VAESIMC, CPU Feature: AVX, AES
func (x Uint32x4) AESInvMixColumns() Uint32x4

/* AESRoundKeyGenAssist */

// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
// x is an array of AES words, but only x[0] and x[2] are used.
// r is a value from the Rcon constant array.
// result[0] = XOR(SubWord(RotWord(x[0])), r)
// result[1] = SubWord(x[1])
// result[2] = XOR(SubWord(RotWord(x[2])), r)
// result[3] = SubWord(x[3])
//
// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4

/* Abs */

// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX
func (x Int8x16) Abs() Int8x16

// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX2
func (x Int8x32) Abs() Int8x32

// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512
func (x Int8x64) Abs() Int8x64

// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX
func (x Int16x8) Abs() Int16x8

// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX2
func (x Int16x16) Abs() Int16x16

// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512
func (x Int16x32) Abs() Int16x32

// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX
func (x Int32x4) Abs() Int32x4

// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX2
func (x Int32x8) Abs() Int32x8

// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512
func (x Int32x16) Abs() Int32x16

// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512
func (x Int64x2) Abs() Int64x2

// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512
func (x Int64x4) Abs() Int64x4

// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512
func (x Int64x8) Abs() Int64x8

/* Add */

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func (x Float32x4) Add(y Float32x4) Float32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func (x Float32x8) Add(y Float32x8) Float32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX512
func (x Float32x16) Add(y Float32x16) Float32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX
func (x Float64x2) Add(y Float64x2) Float64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX
func (x Float64x4) Add(y Float64x4) Float64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX512
func (x Float64x8) Add(y Float64x8) Float64x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX
func (x Int8x16) Add(y Int8x16) Int8x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX2
func (x Int8x32) Add(y Int8x32) Int8x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512
func (x Int8x64) Add(y Int8x64) Int8x64

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX
func (x Int16x8) Add(y Int16x8) Int16x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX2
func (x Int16x16) Add(y Int16x16) Int16x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512
func (x Int16x32) Add(y Int16x32) Int16x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX
func (x Int32x4) Add(y Int32x4) Int32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX2
func (x Int32x8) Add(y Int32x8) Int32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512
func (x Int32x16) Add(y Int32x16) Int32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX
func (x Int64x2) Add(y Int64x2) Int64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX2
func (x Int64x4) Add(y Int64x4) Int64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512
func (x Int64x8) Add(y Int64x8) Int64x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX
func (x Uint8x16) Add(y Uint8x16) Uint8x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX2
func (x Uint8x32) Add(y Uint8x32) Uint8x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512
func (x Uint8x64) Add(y Uint8x64) Uint8x64

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX
func (x Uint16x8) Add(y Uint16x8) Uint16x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX2
func (x Uint16x16) Add(y Uint16x16) Uint16x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512
func (x Uint16x32) Add(y Uint16x32) Uint16x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX
func (x Uint32x4) Add(y Uint32x4) Uint32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX2
func (x Uint32x8) Add(y Uint32x8) Uint32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512
func (x Uint32x16) Add(y Uint32x16) Uint32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX
func (x Uint64x2) Add(y Uint64x2) Uint64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX2
func (x Uint64x4) Add(y Uint64x4) Uint64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512
func (x Uint64x8) Add(y Uint64x8) Uint64x8

/* AddPairs */

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func (x Float32x4) AddPairs(y Float32x4) Float32x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func (x Float32x8) AddPairs(y Float32x8) Float32x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPD, CPU Feature: AVX
func (x Float64x2) AddPairs(y Float64x2) Float64x2

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPD, CPU Feature: AVX
func (x Float64x4) AddPairs(y Float64x4) Float64x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX
func (x Int16x8) AddPairs(y Int16x8) Int16x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
func (x Int16x16) AddPairs(y Int16x16) Int16x16

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
func (x Int32x4) AddPairs(y Int32x4) Int32x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
func (x Int32x8) AddPairs(y Int32x8) Int32x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX
func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4

// AddPairs horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8

/* AddPairsSaturated */

// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX
func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8

// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX2
func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16

/* AddSaturated */

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX
func (x Int8x16) AddSaturated(y Int8x16) Int8x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX2
func (x Int8x32) AddSaturated(y Int8x32) Int8x32

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512
func (x Int8x64) AddSaturated(y Int8x64) Int8x64

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX
func (x Int16x8) AddSaturated(y Int16x8) Int16x8

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX2
func (x Int16x16) AddSaturated(y Int16x16) Int16x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512
func (x Int16x32) AddSaturated(y Int16x32) Int16x32

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSB, CPU Feature: AVX
func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSB, CPU Feature: AVX2
func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSB, CPU Feature: AVX512
func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSW, CPU Feature: AVX
func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSW, CPU Feature: AVX2
func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16

// AddSaturated adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDUSW, CPU Feature: AVX512
func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32

/* AddSub */

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPS, CPU Feature: AVX
func (x Float32x4) AddSub(y Float32x4) Float32x4

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPS, CPU Feature: AVX
func (x Float32x8) AddSub(y Float32x8) Float32x8

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPD, CPU Feature: AVX
func (x Float64x2) AddSub(y Float64x2) Float64x2

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPD, CPU Feature: AVX
func (x Float64x4) AddSub(y Float64x4) Float64x4

/* And */

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int8x16) And(y Int8x16) Int8x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int8x32) And(y Int8x32) Int8x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func (x Int8x64) And(y Int8x64) Int8x64

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int16x8) And(y Int16x8) Int16x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int16x16) And(y Int16x16) Int16x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func (x Int16x32) And(y Int16x32) Int16x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int32x4) And(y Int32x4) Int32x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int32x8) And(y Int32x8) Int32x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func (x Int32x16) And(y Int32x16) Int32x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int64x2) And(y Int64x2) Int64x2

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int64x4) And(y Int64x4) Int64x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512
func (x Int64x8) And(y Int64x8) Int64x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint8x16) And(y Uint8x16) Uint8x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint8x32) And(y Uint8x32) Uint8x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func (x Uint8x64) And(y Uint8x64) Uint8x64

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint16x8) And(y Uint16x8) Uint16x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint16x16) And(y Uint16x16) Uint16x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func (x Uint16x32) And(y Uint16x32) Uint16x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint32x4) And(y Uint32x4) Uint32x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint32x8) And(y Uint32x8) Uint32x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512
func (x Uint32x16) And(y Uint32x16) Uint32x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint64x2) And(y Uint64x2) Uint64x2

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint64x4) And(y Uint64x4) Uint64x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512
func (x Uint64x8) And(y Uint64x8) Uint64x8

/* AndNot */

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int8x16) AndNot(y Int8x16) Int8x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int8x32) AndNot(y Int8x32) Int8x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func (x Int8x64) AndNot(y Int8x64) Int8x64

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int16x8) AndNot(y Int16x8) Int16x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int16x16) AndNot(y Int16x16) Int16x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func (x Int16x32) AndNot(y Int16x32) Int16x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int32x4) AndNot(y Int32x4) Int32x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int32x8) AndNot(y Int32x8) Int32x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func (x Int32x16) AndNot(y Int32x16) Int32x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int64x2) AndNot(y Int64x2) Int64x2

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int64x4) AndNot(y Int64x4) Int64x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDNQ, CPU Feature: AVX512
func (x Int64x8) AndNot(y Int64x8) Int64x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint8x16) AndNot(y Uint8x16) Uint8x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint8x32) AndNot(y Uint8x32) Uint8x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func (x Uint8x64) AndNot(y Uint8x64) Uint8x64

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint16x8) AndNot(y Uint16x8) Uint16x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint16x16) AndNot(y Uint16x16) Uint16x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func (x Uint16x32) AndNot(y Uint16x32) Uint16x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint32x4) AndNot(y Uint32x4) Uint32x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint32x8) AndNot(y Uint32x8) Uint32x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512
func (x Uint32x16) AndNot(y Uint32x16) Uint32x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint64x2) AndNot(y Uint64x2) Uint64x2

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint64x4) AndNot(y Uint64x4) Uint64x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDNQ, CPU Feature: AVX512
func (x Uint64x8) AndNot(y Uint64x8) Uint64x8

/* Average */

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX
func (x Uint8x16) Average(y Uint8x16) Uint8x16

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX2
func (x Uint8x32) Average(y Uint8x32) Uint8x32

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX512
func (x Uint8x64) Average(y Uint8x64) Uint8x64

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX
func (x Uint16x8) Average(y Uint16x8) Uint16x8

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX2
func (x Uint16x16) Average(y Uint16x16) Uint16x16

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX512
func (x Uint16x32) Average(y Uint16x32) Uint16x32

/* Broadcast128 */

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast128() Float32x4

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Float64x2) Broadcast128() Float64x2

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast128() Int8x16

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast128() Int16x8

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast128() Int32x4

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast128() Int64x2

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast128() Uint8x16

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast128() Uint16x8

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast128() Uint32x4

// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast128() Uint64x2

/* Broadcast256 */

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast256() Float32x8

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX2
func (x Float64x2) Broadcast256() Float64x4

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast256() Int8x32

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast256() Int16x16

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast256() Int32x8

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast256() Int64x4

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast256() Uint8x32

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast256() Uint16x16

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast256() Uint32x8

// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast256() Uint64x4

/* Broadcast512 */

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast512() Float32x16

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX512
func (x Float64x2) Broadcast512() Float64x8

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast512() Int8x64

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast512() Int16x32

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast512() Int32x16

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Int64x2) Broadcast512() Int64x8

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast512() Uint8x64

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast512() Uint16x32

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast512() Uint32x16

// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Uint64x2) Broadcast512() Uint64x8

/* Ceil */

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Ceil() Float32x4

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Ceil() Float32x8

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Ceil() Float64x2

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Ceil() Float64x4

/* CeilScaled */

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x4) CeilScaled(prec uint8) Float32x4

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x8) CeilScaled(prec uint8) Float32x8

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x16) CeilScaled(prec uint8) Float32x16

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x2) CeilScaled(prec uint8) Float64x2

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x4) CeilScaled(prec uint8) Float64x4

// CeilScaled rounds elements up with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x8) CeilScaled(prec uint8) Float64x8

/* CeilScaledResidue */

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x4) CeilScaledResidue(prec uint8) Float32x4

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x8) CeilScaledResidue(prec uint8) Float32x8

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x16) CeilScaledResidue(prec uint8) Float32x16

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x2) CeilScaledResidue(prec uint8) Float64x2

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4

// CeilScaledResidue computes the difference after ceiling with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8

/* Compress */

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512
func (x Float32x4) Compress(mask Mask32x4) Float32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512
func (x Float32x8) Compress(mask Mask32x8) Float32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512
func (x Float32x16) Compress(mask Mask32x16) Float32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512
func (x Float64x2) Compress(mask Mask64x2) Float64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512
func (x Float64x4) Compress(mask Mask64x4) Float64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512
func (x Float64x8) Compress(mask Mask64x8) Float64x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Int8x16) Compress(mask Mask8x16) Int8x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Int8x32) Compress(mask Mask8x32) Int8x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Int8x64) Compress(mask Mask8x64) Int8x64

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Int16x8) Compress(mask Mask16x8) Int16x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Int16x16) Compress(mask Mask16x16) Int16x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Int16x32) Compress(mask Mask16x32) Int16x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func (x Int32x4) Compress(mask Mask32x4) Int32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func (x Int32x8) Compress(mask Mask32x8) Int32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func (x Int32x16) Compress(mask Mask32x16) Int32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Int64x2) Compress(mask Mask64x2) Int64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Int64x4) Compress(mask Mask64x4) Int64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Int64x8) Compress(mask Mask64x8) Int64x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Uint8x16) Compress(mask Mask8x16) Uint8x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Uint8x32) Compress(mask Mask8x32) Uint8x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Uint8x64) Compress(mask Mask8x64) Uint8x64

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Uint16x8) Compress(mask Mask16x8) Uint16x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Uint16x16) Compress(mask Mask16x16) Uint16x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Uint16x32) Compress(mask Mask16x32) Uint16x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func (x Uint32x4) Compress(mask Mask32x4) Uint32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func (x Uint32x8) Compress(mask Mask32x8) Uint32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512
func (x Uint32x16) Compress(mask Mask32x16) Uint32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Uint64x2) Compress(mask Mask64x2) Uint64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Uint64x4) Compress(mask Mask64x4) Uint64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Uint64x8) Compress(mask Mask64x8) Uint64x8

/* ConcatPermute */

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512
func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512
func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512
func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512
func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512
func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512
func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512
func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512
func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8

// ConcatPermute performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512
func (x Uint64x8) ConcatPermute(y Uint64x8, indices Uint64x8) Uint64x8

/* ConcatShiftBytesRight */

// ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes.
// The result vector will be the lower half of the concatenated vector.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX
func (x Uint8x16) ConcatShiftBytesRight(constant uint8, y Uint8x16) Uint8x16

/* ConcatShiftBytesRightGrouped */

// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX2
func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8x32

// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX512
func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64

/* ConvertToFloat32 */

// ConvertToFloat32 converts element values to float32.
// The result vector's elements are rounded to the nearest value.
//
// Asm: VCVTPD2PSX, CPU Feature: AVX
func (x Float64x2) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
// The result vector's elements are rounded to the nearest value.
//
// Asm: VCVTPD2PSY, CPU Feature: AVX
func (x Float64x4) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
// The result vector's elements are rounded to the nearest value.
//
// Asm: VCVTPD2PS, CPU Feature: AVX512
func (x Float64x8) ConvertToFloat32() Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTDQ2PS, CPU Feature: AVX
func (x Int32x4) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTDQ2PS, CPU Feature: AVX
func (x Int32x8) ConvertToFloat32() Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTDQ2PS, CPU Feature: AVX512
func (x Int32x16) ConvertToFloat32() Float32x16

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTQQ2PSX, CPU Feature: AVX512
func (x Int64x2) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTQQ2PSY, CPU Feature: AVX512
func (x Int64x4) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTQQ2PS, CPU Feature: AVX512
func (x Int64x8) ConvertToFloat32() Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUDQ2PS, CPU Feature: AVX512
func (x Uint32x4) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUDQ2PS, CPU Feature: AVX512
func (x Uint32x8) ConvertToFloat32() Float32x8

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUDQ2PS, CPU Feature: AVX512
func (x Uint32x16) ConvertToFloat32() Float32x16

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUQQ2PSX, CPU Feature: AVX512
func (x Uint64x2) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUQQ2PSY, CPU Feature: AVX512
func (x Uint64x4) ConvertToFloat32() Float32x4

// ConvertToFloat32 converts element values to float32.
//
// Asm: VCVTUQQ2PS, CPU Feature: AVX512
func (x Uint64x8) ConvertToFloat32() Float32x8

/* ConvertToFloat64 */

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTPS2PD, CPU Feature: AVX
func (x Float32x4) ConvertToFloat64() Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTPS2PD, CPU Feature: AVX512
func (x Float32x8) ConvertToFloat64() Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTDQ2PD, CPU Feature: AVX
func (x Int32x4) ConvertToFloat64() Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTDQ2PD, CPU Feature: AVX512
func (x Int32x8) ConvertToFloat64() Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTQQ2PD, CPU Feature: AVX512
func (x Int64x2) ConvertToFloat64() Float64x2

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTQQ2PD, CPU Feature: AVX512
func (x Int64x4) ConvertToFloat64() Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTQQ2PD, CPU Feature: AVX512
func (x Int64x8) ConvertToFloat64() Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUDQ2PD, CPU Feature: AVX512
func (x Uint32x4) ConvertToFloat64() Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUDQ2PD, CPU Feature: AVX512
func (x Uint32x8) ConvertToFloat64() Float64x8

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUQQ2PD, CPU Feature: AVX512
func (x Uint64x2) ConvertToFloat64() Float64x2

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUQQ2PD, CPU Feature: AVX512
func (x Uint64x4) ConvertToFloat64() Float64x4

// ConvertToFloat64 converts element values to float64.
//
// Asm: VCVTUQQ2PD, CPU Feature: AVX512
func (x Uint64x8) ConvertToFloat64() Float64x8

/* ConvertToInt32 */

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX
func (x Float32x4) ConvertToInt32() Int32x4

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX
func (x Float32x8) ConvertToInt32() Int32x8

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX512
func (x Float32x16) ConvertToInt32() Int32x16

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2DQX, CPU Feature: AVX
func (x Float64x2) ConvertToInt32() Int32x4

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2DQY, CPU Feature: AVX
func (x Float64x4) ConvertToInt32() Int32x4

// ConvertToInt32 converts element values to int32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2DQ, CPU Feature: AVX512
func (x Float64x8) ConvertToInt32() Int32x8

/* ConvertToInt64 */

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2QQ, CPU Feature: AVX512
func (x Float32x4) ConvertToInt64() Int64x4

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2QQ, CPU Feature: AVX512
func (x Float32x8) ConvertToInt64() Int64x8

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2QQ, CPU Feature: AVX512
func (x Float64x2) ConvertToInt64() Int64x2

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2QQ, CPU Feature: AVX512
func (x Float64x4) ConvertToInt64() Int64x4

// ConvertToInt64 converts element values to int64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in int64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2QQ, CPU Feature: AVX512
func (x Float64x8) ConvertToInt64() Int64x8

/* ConvertToUint32 */

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UDQ, CPU Feature: AVX512
func (x Float32x4) ConvertToUint32() Uint32x4

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UDQ, CPU Feature: AVX512
func (x Float32x8) ConvertToUint32() Uint32x8

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UDQ, CPU Feature: AVX512
func (x Float32x16) ConvertToUint32() Uint32x16

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UDQX, CPU Feature: AVX512
func (x Float64x2) ConvertToUint32() Uint32x4

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UDQY, CPU Feature: AVX512
func (x Float64x4) ConvertToUint32() Uint32x4

// ConvertToUint32 converts element values to uint32.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint32, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UDQ, CPU Feature: AVX512
func (x Float64x8) ConvertToUint32() Uint32x8

/* ConvertToUint64 */

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UQQ, CPU Feature: AVX512
func (x Float32x4) ConvertToUint64() Uint64x4

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPS2UQQ, CPU Feature: AVX512
func (x Float32x8) ConvertToUint64() Uint64x8

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UQQ, CPU Feature: AVX512
func (x Float64x2) ConvertToUint64() Uint64x2

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UQQ, CPU Feature: AVX512
func (x Float64x4) ConvertToUint64() Uint64x4

// ConvertToUint64 converts element values to uint64.
// When a conversion is inexact, a truncated (round toward zero) value is returned.
// If a converted result cannot be represented in uint64, an implementation-defined
// architecture-specific value is returned.
//
// Asm: VCVTTPD2UQQ, CPU Feature: AVX512
func (x Float64x8) ConvertToUint64() Uint64x8

/* CopySign */

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNB, CPU Feature: AVX
func (x Int8x16) CopySign(y Int8x16) Int8x16

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNB, CPU Feature: AVX2
func (x Int8x32) CopySign(y Int8x32) Int8x32

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNW, CPU Feature: AVX
func (x Int16x8) CopySign(y Int16x8) Int16x8

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNW, CPU Feature: AVX2
func (x Int16x16) CopySign(y Int16x16) Int16x16

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGND, CPU Feature: AVX
func (x Int32x4) CopySign(y Int32x4) Int32x4

// CopySign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGND, CPU Feature: AVX2
func (x Int32x8) CopySign(y Int32x8) Int32x8

/* Div */

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX
func (x Float32x4) Div(y Float32x4) Float32x4

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX
func (x Float32x8) Div(y Float32x8) Float32x8

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX512
func (x Float32x16) Div(y Float32x16) Float32x16

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func (x Float64x2) Div(y Float64x2) Float64x2

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func (x Float64x4) Div(y Float64x4) Float64x4

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX512
func (x Float64x8) Div(y Float64x8) Float64x8

/* DotProductPairs */

// DotProductPairs multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX
func (x Int16x8) DotProductPairs(y Int16x8) Int32x4

// DotProductPairs multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX2
func (x Int16x16) DotProductPairs(y Int16x16) Int32x8

// DotProductPairs multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX512
func (x Int16x32) DotProductPairs(y Int16x32) Int32x16

/* DotProductPairsSaturated */

// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX
func (x Uint8x16) DotProductPairsSaturated(y Int8x16) Int16x8

// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX2
func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16

// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512
func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32

/* DotProductQuadruple */

// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4

// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8

// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16

/* DotProductQuadrupleSaturated */

// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4

// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8

// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16

/* Equal */

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Int8x16) Equal(y Int8x16) Mask8x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Int8x32) Equal(y Int8x32) Mask8x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func (x Int8x64) Equal(y Int8x64) Mask8x64

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Int16x8) Equal(y Int16x8) Mask16x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Int16x16) Equal(y Int16x16) Mask16x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func (x Int16x32) Equal(y Int16x32) Mask16x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Int32x4) Equal(y Int32x4) Mask32x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Int32x8) Equal(y Int32x8) Mask32x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func (x Int32x16) Equal(y Int32x16) Mask32x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Int64x2) Equal(y Int64x2) Mask64x2

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Int64x4) Equal(y Int64x4) Mask64x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func (x Int64x8) Equal(y Int64x8) Mask64x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Uint8x16) Equal(y Uint8x16) Mask8x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Uint8x32) Equal(y Uint8x32) Mask8x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func (x Uint8x64) Equal(y Uint8x64) Mask8x64

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Uint16x8) Equal(y Uint16x8) Mask16x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Uint16x16) Equal(y Uint16x16) Mask16x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func (x Uint16x32) Equal(y Uint16x32) Mask16x32

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Uint32x4) Equal(y Uint32x4) Mask32x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Uint32x8) Equal(y Uint32x8) Mask32x8

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func (x Uint32x16) Equal(y Uint32x16) Mask32x16

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Uint64x2) Equal(y Uint64x2) Mask64x2

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Uint64x4) Equal(y Uint64x4) Mask64x4

// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func (x Uint64x8) Equal(y Uint64x8) Mask64x8

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Equal(y Float32x4) Mask32x4

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Equal(y Float32x8) Mask32x8

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Equal(y Float32x16) Mask32x16

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Equal(y Float64x2) Mask64x2

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Equal(y Float64x4) Mask64x4

// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Equal(y Float64x8) Mask64x8

/* Expand */

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512
func (x Float32x4) Expand(mask Mask32x4) Float32x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512
func (x Float32x8) Expand(mask Mask32x8) Float32x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512
func (x Float32x16) Expand(mask Mask32x16) Float32x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512
func (x Float64x2) Expand(mask Mask64x2) Float64x2

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512
func (x Float64x4) Expand(mask Mask64x4) Float64x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512
func (x Float64x8) Expand(mask Mask64x8) Float64x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Int8x16) Expand(mask Mask8x16) Int8x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Int8x32) Expand(mask Mask8x32) Int8x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Int8x64) Expand(mask Mask8x64) Int8x64

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Int16x8) Expand(mask Mask16x8) Int16x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Int16x16) Expand(mask Mask16x16) Int16x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Int16x32) Expand(mask Mask16x32) Int16x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func (x Int32x4) Expand(mask Mask32x4) Int32x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func (x Int32x8) Expand(mask Mask32x8) Int32x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func (x Int32x16) Expand(mask Mask32x16) Int32x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Int64x2) Expand(mask Mask64x2) Int64x2

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Int64x4) Expand(mask Mask64x4) Int64x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Int64x8) Expand(mask Mask64x8) Int64x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Uint8x16) Expand(mask Mask8x16) Uint8x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Uint8x32) Expand(mask Mask8x32) Uint8x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Uint8x64) Expand(mask Mask8x64) Uint8x64

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) Expand(mask Mask16x8) Uint16x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) Expand(mask Mask16x16) Uint16x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) Expand(mask Mask16x32) Uint16x32

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func (x Uint32x4) Expand(mask Mask32x4) Uint32x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func (x Uint32x8) Expand(mask Mask32x8) Uint32x8

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512
func (x Uint32x16) Expand(mask Mask32x16) Uint32x16

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Uint64x2) Expand(mask Mask64x2) Uint64x2

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Uint64x4) Expand(mask Mask64x4) Uint64x4

// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Uint64x8) Expand(mask Mask64x8) Uint64x8

/* ExtendLo2ToInt64x2 */

// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX
func (x Int8x16) ExtendLo2ToInt64x2() Int64x2

// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX
func (x Int16x8) ExtendLo2ToInt64x2() Int64x2

// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX
func (x Int32x4) ExtendLo2ToInt64x2() Int64x2

/* ExtendLo2ToUint64x2 */

// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX
func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2

// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX
func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2

// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX
func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2

/* ExtendLo4ToInt32x4 */

// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBD, CPU Feature: AVX
func (x Int8x16) ExtendLo4ToInt32x4() Int32x4

// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWD, CPU Feature: AVX
func (x Int16x8) ExtendLo4ToInt32x4() Int32x4

/* ExtendLo4ToInt64x4 */

// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX2
func (x Int8x16) ExtendLo4ToInt64x4() Int64x4

// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX2
func (x Int16x8) ExtendLo4ToInt64x4() Int64x4

/* ExtendLo4ToUint32x4 */

// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBD, CPU Feature: AVX
func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4

// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWD, CPU Feature: AVX
func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4

/* ExtendLo4ToUint64x4 */

// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX2
func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4

// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX2
func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4

/* ExtendLo8ToInt16x8 */

// ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBW, CPU Feature: AVX
func (x Int8x16) ExtendLo8ToInt16x8() Int16x8

/* ExtendLo8ToInt32x8 */

// ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBD, CPU Feature: AVX2
func (x Int8x16) ExtendLo8ToInt32x8() Int32x8

/* ExtendLo8ToInt64x8 */

// ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX512
func (x Int8x16) ExtendLo8ToInt64x8() Int64x8

/* ExtendLo8ToUint16x8 */

// ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBW, CPU Feature: AVX
func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8

/* ExtendLo8ToUint32x8 */

// ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBD, CPU Feature: AVX2
func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8

/* ExtendLo8ToUint64x8 */

// ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX512
func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8

/* ExtendToInt16 */

// ExtendToInt16 converts element values to int16.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBW, CPU Feature: AVX2
func (x Int8x16) ExtendToInt16() Int16x16

// ExtendToInt16 converts element values to int16.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBW, CPU Feature: AVX512
func (x Int8x32) ExtendToInt16() Int16x32

/* ExtendToInt32 */

// ExtendToInt32 converts element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXBD, CPU Feature: AVX512
func (x Int8x16) ExtendToInt32() Int32x16

// ExtendToInt32 converts element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWD, CPU Feature: AVX2
func (x Int16x8) ExtendToInt32() Int32x8

// ExtendToInt32 converts element values to int32.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWD, CPU Feature: AVX512
func (x Int16x16) ExtendToInt32() Int32x16

/* ExtendToInt64 */

// ExtendToInt64 converts element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX512
func (x Int16x8) ExtendToInt64() Int64x8

// ExtendToInt64 converts element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX2
func (x Int32x4) ExtendToInt64() Int64x4

// ExtendToInt64 converts element values to int64.
// The result vector's elements are sign-extended.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX512
func (x Int32x8) ExtendToInt64() Int64x8

/* ExtendToUint16 */

// ExtendToUint16 converts element values to uint16.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBW, CPU Feature: AVX2
func (x Uint8x16) ExtendToUint16() Uint16x16

// ExtendToUint16 converts element values to uint16.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBW, CPU Feature: AVX512
func (x Uint8x32) ExtendToUint16() Uint16x32

/* ExtendToUint32 */

// ExtendToUint32 converts element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXBD, CPU Feature: AVX512
func (x Uint8x16) ExtendToUint32() Uint32x16

// ExtendToUint32 converts element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWD, CPU Feature: AVX2
func (x Uint16x8) ExtendToUint32() Uint32x8

// ExtendToUint32 converts element values to uint32.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWD, CPU Feature: AVX512
func (x Uint16x16) ExtendToUint32() Uint32x16

/* ExtendToUint64 */

// ExtendToUint64 converts element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX512
func (x Uint16x8) ExtendToUint64() Uint64x8

// ExtendToUint64 converts element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX2
func (x Uint32x4) ExtendToUint64() Uint64x4

// ExtendToUint64 converts element values to uint64.
// The result vector's elements are zero-extended.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX512
func (x Uint32x8) ExtendToUint64() Uint64x8

/* Floor */

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Floor() Float32x4

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Floor() Float32x8

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Floor() Float64x2

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Floor() Float64x4

/* FloorScaled */

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x4) FloorScaled(prec uint8) Float32x4

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x8) FloorScaled(prec uint8) Float32x8

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x16) FloorScaled(prec uint8) Float32x16

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x2) FloorScaled(prec uint8) Float64x2

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x4) FloorScaled(prec uint8) Float64x4

// FloorScaled rounds elements down with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x8) FloorScaled(prec uint8) Float64x8

/* FloorScaledResidue */

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4

// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8

/* GaloisFieldAffineTransform */

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64

/* GaloisFieldAffineTransformInverse */

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x16

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x32

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64

/* GaloisFieldMul */

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64

/* GetElem */

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Float32x4) GetElem(index uint8) float32

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Float64x2) GetElem(index uint8) float64

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRB, CPU Feature: AVX512
func (x Int8x16) GetElem(index uint8) int8

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRW, CPU Feature: AVX512
func (x Int16x8) GetElem(index uint8) int16

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Int32x4) GetElem(index uint8) int32

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Int64x2) GetElem(index uint8) int64

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRB, CPU Feature: AVX512
func (x Uint8x16) GetElem(index uint8) uint8

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRW, CPU Feature: AVX512
func (x Uint16x8) GetElem(index uint8) uint16

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Uint32x4) GetElem(index uint8) uint32

// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Uint64x2) GetElem(index uint8) uint64

/* GetHi */

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float32x8) GetHi() Float32x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func (x Float32x16) GetHi() Float32x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float64x4) GetHi() Float64x2

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func (x Float64x8) GetHi() Float64x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int8x32) GetHi() Int8x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int8x64) GetHi() Int8x32

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int16x16) GetHi() Int16x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int16x32) GetHi() Int16x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int32x8) GetHi() Int32x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int32x16) GetHi() Int32x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int64x4) GetHi() Int64x2

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int64x8) GetHi() Int64x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint8x32) GetHi() Uint8x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint8x64) GetHi() Uint8x32

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint16x16) GetHi() Uint16x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint16x32) GetHi() Uint16x16

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint32x8) GetHi() Uint32x4

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint32x16) GetHi() Uint32x8

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint64x4) GetHi() Uint64x2

// GetHi returns the upper half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint64x8) GetHi() Uint64x4

/* GetLo */

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float32x8) GetLo() Float32x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func (x Float32x16) GetLo() Float32x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float64x4) GetLo() Float64x2

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTF64X4, CPU Feature: AVX512
func (x Float64x8) GetLo() Float64x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int8x32) GetLo() Int8x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int8x64) GetLo() Int8x32

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int16x16) GetLo() Int16x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int16x32) GetLo() Int16x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int32x8) GetLo() Int32x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int32x16) GetLo() Int32x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int64x4) GetLo() Int64x2

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Int64x8) GetLo() Int64x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint8x32) GetLo() Uint8x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint8x64) GetLo() Uint8x32

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint16x16) GetLo() Uint16x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint16x32) GetLo() Uint16x16

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint32x8) GetLo() Uint32x4

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint32x16) GetLo() Uint32x8

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint64x4) GetLo() Uint64x2

// GetLo returns the lower half of x.
//
// Asm: VEXTRACTI64X4, CPU Feature: AVX512
func (x Uint64x8) GetLo() Uint64x4

/* Greater */

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX
func (x Int8x16) Greater(y Int8x16) Mask8x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX2
func (x Int8x32) Greater(y Int8x32) Mask8x32

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX512
func (x Int8x64) Greater(y Int8x64) Mask8x64

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX
func (x Int16x8) Greater(y Int16x8) Mask16x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX2
func (x Int16x16) Greater(y Int16x16) Mask16x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX512
func (x Int16x32) Greater(y Int16x32) Mask16x32

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX
func (x Int32x4) Greater(y Int32x4) Mask32x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX2
func (x Int32x8) Greater(y Int32x8) Mask32x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX512
func (x Int32x16) Greater(y Int32x16) Mask32x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX
func (x Int64x2) Greater(y Int64x2) Mask64x2

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX2
func (x Int64x4) Greater(y Int64x4) Mask64x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX512
func (x Int64x8) Greater(y Int64x8) Mask64x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Greater(y Float32x4) Mask32x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Greater(y Float32x8) Mask32x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Greater(y Float32x16) Mask32x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Greater(y Float64x2) Mask64x2

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Greater(y Float64x4) Mask64x4

// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Greater(y Float64x8) Mask64x8

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Greater(y Uint8x64) Mask8x64

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Greater(y Uint16x32) Mask16x32

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Greater(y Uint32x16) Mask32x16

// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) Greater(y Uint64x8) Mask64x8

/* GreaterEqual */

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16

// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8

/* InterleaveHi */

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX
func (x Int16x8) InterleaveHi(y Int16x8) Int16x8

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX
func (x Int32x4) InterleaveHi(y Int32x4) Int32x4

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX
func (x Int64x2) InterleaveHi(y Int64x2) Int64x2

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX
func (x Uint16x8) InterleaveHi(y Uint16x8) Uint16x8

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX
func (x Uint32x4) InterleaveHi(y Uint32x4) Uint32x4

// InterleaveHi interleaves the elements of the high halves of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX
func (x Uint64x2) InterleaveHi(y Uint64x2) Uint64x2

/* InterleaveHiGrouped */

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX2
func (x Int16x16) InterleaveHiGrouped(y Int16x16) Int16x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX512
func (x Int16x32) InterleaveHiGrouped(y Int16x32) Int16x32

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX2
func (x Int32x8) InterleaveHiGrouped(y Int32x8) Int32x8

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX512
func (x Int32x16) InterleaveHiGrouped(y Int32x16) Int32x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX2
func (x Int64x4) InterleaveHiGrouped(y Int64x4) Int64x4

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX512
func (x Int64x8) InterleaveHiGrouped(y Int64x8) Int64x8

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX2
func (x Uint16x16) InterleaveHiGrouped(y Uint16x16) Uint16x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHWD, CPU Feature: AVX512
func (x Uint16x32) InterleaveHiGrouped(y Uint16x32) Uint16x32

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX2
func (x Uint32x8) InterleaveHiGrouped(y Uint32x8) Uint32x8

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHDQ, CPU Feature: AVX512
func (x Uint32x16) InterleaveHiGrouped(y Uint32x16) Uint32x16

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX2
func (x Uint64x4) InterleaveHiGrouped(y Uint64x4) Uint64x4

// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKHQDQ, CPU Feature: AVX512
func (x Uint64x8) InterleaveHiGrouped(y Uint64x8) Uint64x8

/* InterleaveLo */

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX
func (x Int16x8) InterleaveLo(y Int16x8) Int16x8

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX
func (x Int32x4) InterleaveLo(y Int32x4) Int32x4

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX
func (x Int64x2) InterleaveLo(y Int64x2) Int64x2

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX
func (x Uint16x8) InterleaveLo(y Uint16x8) Uint16x8

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX
func (x Uint32x4) InterleaveLo(y Uint32x4) Uint32x4

// InterleaveLo interleaves the elements of the low halves of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX
func (x Uint64x2) InterleaveLo(y Uint64x2) Uint64x2

/* InterleaveLoGrouped */

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX2
func (x Int16x16) InterleaveLoGrouped(y Int16x16) Int16x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX512
func (x Int16x32) InterleaveLoGrouped(y Int16x32) Int16x32

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX2
func (x Int32x8) InterleaveLoGrouped(y Int32x8) Int32x8

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX512
func (x Int32x16) InterleaveLoGrouped(y Int32x16) Int32x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX2
func (x Int64x4) InterleaveLoGrouped(y Int64x4) Int64x4

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
func (x Int64x8) InterleaveLoGrouped(y Int64x8) Int64x8

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX2
func (x Uint16x16) InterleaveLoGrouped(y Uint16x16) Uint16x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLWD, CPU Feature: AVX512
func (x Uint16x32) InterleaveLoGrouped(y Uint16x32) Uint16x32

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX2
func (x Uint32x8) InterleaveLoGrouped(y Uint32x8) Uint32x8

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLDQ, CPU Feature: AVX512
func (x Uint32x16) InterleaveLoGrouped(y Uint32x16) Uint32x16

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX2
func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4

// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
//
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8

/* IsNan */

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) IsNan(y Float32x4) Mask32x4

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) IsNan(y Float32x8) Mask32x8

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) IsNan(y Float32x16) Mask32x16

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) IsNan(y Float64x2) Mask64x2

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) IsNan(y Float64x4) Mask64x4

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) IsNan(y Float64x8) Mask64x8

/* LeadingZeros */

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func (x Int32x4) LeadingZeros() Int32x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func (x Int32x8) LeadingZeros() Int32x8

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func (x Int32x16) LeadingZeros() Int32x16

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func (x Int64x2) LeadingZeros() Int64x2

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func (x Int64x4) LeadingZeros() Int64x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func (x Int64x8) LeadingZeros() Int64x8

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func (x Uint32x4) LeadingZeros() Uint32x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func (x Uint32x8) LeadingZeros() Uint32x8

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTD, CPU Feature: AVX512
func (x Uint32x16) LeadingZeros() Uint32x16

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func (x Uint64x2) LeadingZeros() Uint64x2

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func (x Uint64x4) LeadingZeros() Uint64x4

// LeadingZeros counts the leading zeros of each element in x.
//
// Asm: VPLZCNTQ, CPU Feature: AVX512
func (x Uint64x8) LeadingZeros() Uint64x8

/* Less */

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Less(y Float32x4) Mask32x4

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Less(y Float32x8) Mask32x8

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Less(y Float32x16) Mask32x16

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Less(y Float64x2) Mask64x2

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Less(y Float64x4) Mask64x4

// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Less(y Float64x8) Mask64x8

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) Less(y Int8x64) Mask8x64

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) Less(y Int16x32) Mask16x32

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) Less(y Int32x16) Mask32x16

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) Less(y Int64x8) Mask64x8

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Less(y Uint8x64) Mask8x64

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Less(y Uint16x32) Mask16x32

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Less(y Uint32x16) Mask32x16

// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) Less(y Uint64x8) Mask64x8

/* LessEqual */

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) LessEqual(y Float32x4) Mask32x4

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) LessEqual(y Float32x8) Mask32x8

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) LessEqual(y Float32x16) Mask32x16

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) LessEqual(y Float64x2) Mask64x2

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) LessEqual(y Float64x4) Mask64x4

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) LessEqual(y Float64x8) Mask64x8

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) LessEqual(y Int8x64) Mask8x64

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) LessEqual(y Int16x32) Mask16x32

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) LessEqual(y Int32x16) Mask32x16

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) LessEqual(y Int64x8) Mask64x8

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16

// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8

/* Max */

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX
func (x Float32x4) Max(y Float32x4) Float32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX
func (x Float32x8) Max(y Float32x8) Float32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX512
func (x Float32x16) Max(y Float32x16) Float32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX
func (x Float64x2) Max(y Float64x2) Float64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX
func (x Float64x4) Max(y Float64x4) Float64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX512
func (x Float64x8) Max(y Float64x8) Float64x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX
func (x Int8x16) Max(y Int8x16) Int8x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX2
func (x Int8x32) Max(y Int8x32) Int8x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX512
func (x Int8x64) Max(y Int8x64) Int8x64

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX
func (x Int16x8) Max(y Int16x8) Int16x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX2
func (x Int16x16) Max(y Int16x16) Int16x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX512
func (x Int16x32) Max(y Int16x32) Int16x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX
func (x Int32x4) Max(y Int32x4) Int32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX2
func (x Int32x8) Max(y Int32x8) Int32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX512
func (x Int32x16) Max(y Int32x16) Int32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func (x Int64x2) Max(y Int64x2) Int64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func (x Int64x4) Max(y Int64x4) Int64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func (x Int64x8) Max(y Int64x8) Int64x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX
func (x Uint8x16) Max(y Uint8x16) Uint8x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX2
func (x Uint8x32) Max(y Uint8x32) Uint8x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX512
func (x Uint8x64) Max(y Uint8x64) Uint8x64

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX
func (x Uint16x8) Max(y Uint16x8) Uint16x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX2
func (x Uint16x16) Max(y Uint16x16) Uint16x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX512
func (x Uint16x32) Max(y Uint16x32) Uint16x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX
func (x Uint32x4) Max(y Uint32x4) Uint32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX2
func (x Uint32x8) Max(y Uint32x8) Uint32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX512
func (x Uint32x16) Max(y Uint32x16) Uint32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func (x Uint64x2) Max(y Uint64x2) Uint64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func (x Uint64x4) Max(y Uint64x4) Uint64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func (x Uint64x8) Max(y Uint64x8) Uint64x8

/* Min */

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX
func (x Float32x4) Min(y Float32x4) Float32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX
func (x Float32x8) Min(y Float32x8) Float32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX512
func (x Float32x16) Min(y Float32x16) Float32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX
func (x Float64x2) Min(y Float64x2) Float64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX
func (x Float64x4) Min(y Float64x4) Float64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX512
func (x Float64x8) Min(y Float64x8) Float64x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX
func (x Int8x16) Min(y Int8x16) Int8x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX2
func (x Int8x32) Min(y Int8x32) Int8x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX512
func (x Int8x64) Min(y Int8x64) Int8x64

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX
func (x Int16x8) Min(y Int16x8) Int16x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX2
func (x Int16x16) Min(y Int16x16) Int16x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX512
func (x Int16x32) Min(y Int16x32) Int16x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX
func (x Int32x4) Min(y Int32x4) Int32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX2
func (x Int32x8) Min(y Int32x8) Int32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX512
func (x Int32x16) Min(y Int32x16) Int32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func (x Int64x2) Min(y Int64x2) Int64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func (x Int64x4) Min(y Int64x4) Int64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func (x Int64x8) Min(y Int64x8) Int64x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX
func (x Uint8x16) Min(y Uint8x16) Uint8x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX2
func (x Uint8x32) Min(y Uint8x32) Uint8x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX512
func (x Uint8x64) Min(y Uint8x64) Uint8x64

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX
func (x Uint16x8) Min(y Uint16x8) Uint16x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX2
func (x Uint16x16) Min(y Uint16x16) Uint16x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX512
func (x Uint16x32) Min(y Uint16x32) Uint16x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX
func (x Uint32x4) Min(y Uint32x4) Uint32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX2
func (x Uint32x8) Min(y Uint32x8) Uint32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX512
func (x Uint32x16) Min(y Uint32x16) Uint32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func (x Uint64x2) Min(y Uint64x2) Uint64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func (x Uint64x4) Min(y Uint64x4) Uint64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func (x Uint64x8) Min(y Uint64x8) Uint64x8

/* Mul */

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX
func (x Float32x4) Mul(y Float32x4) Float32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX
func (x Float32x8) Mul(y Float32x8) Float32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX512
func (x Float32x16) Mul(y Float32x16) Float32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX
func (x Float64x2) Mul(y Float64x2) Float64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX
func (x Float64x4) Mul(y Float64x4) Float64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX512
func (x Float64x8) Mul(y Float64x8) Float64x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX
func (x Int16x8) Mul(y Int16x8) Int16x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX2
func (x Int16x16) Mul(y Int16x16) Int16x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX512
func (x Int16x32) Mul(y Int16x32) Int16x32

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX
func (x Int32x4) Mul(y Int32x4) Int32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX2
func (x Int32x8) Mul(y Int32x8) Int32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX512
func (x Int32x16) Mul(y Int32x16) Int32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func (x Int64x2) Mul(y Int64x2) Int64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func (x Int64x4) Mul(y Int64x4) Int64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func (x Int64x8) Mul(y Int64x8) Int64x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX
func (x Uint16x8) Mul(y Uint16x8) Uint16x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX2
func (x Uint16x16) Mul(y Uint16x16) Uint16x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLW, CPU Feature: AVX512
func (x Uint16x32) Mul(y Uint16x32) Uint16x32

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX
func (x Uint32x4) Mul(y Uint32x4) Uint32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX2
func (x Uint32x8) Mul(y Uint32x8) Uint32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLD, CPU Feature: AVX512
func (x Uint32x16) Mul(y Uint32x16) Uint32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func (x Uint64x2) Mul(y Uint64x2) Uint64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func (x Uint64x4) Mul(y Uint64x4) Uint64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VPMULLQ, CPU Feature: AVX512
func (x Uint64x8) Mul(y Uint64x8) Uint64x8

/* MulAdd */

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512
func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512
func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512
func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512
func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512
func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4

// MulAdd performs a fused (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512
func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8

/* MulAddSub */

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4

// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8

/* MulEvenWiden */

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX
func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX2
func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX
func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX2
func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4

/* MulHigh */

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX
func (x Int16x8) MulHigh(y Int16x8) Int16x8

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX2
func (x Int16x16) MulHigh(y Int16x16) Int16x16

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX512
func (x Int16x32) MulHigh(y Int16x32) Int16x32

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX
func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX2
func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX512
func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32

/* MulSubAdd */

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4

// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8

/* NotEqual */

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) NotEqual(y Float32x4) Mask32x4

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) NotEqual(y Float32x8) Mask32x8

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) NotEqual(y Float32x16) Mask32x16

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) NotEqual(y Float64x2) Mask64x2

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) NotEqual(y Float64x4) Mask64x4

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) NotEqual(y Float64x8) Mask64x8

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) NotEqual(y Int8x64) Mask8x64

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) NotEqual(y Int16x32) Mask16x32

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) NotEqual(y Int32x16) Mask32x16

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) NotEqual(y Int64x8) Mask64x8

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16

// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8

/* OnesCount */

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x16) OnesCount() Int8x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x32) OnesCount() Int8x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x64) OnesCount() Int8x64

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x8) OnesCount() Int16x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x16) OnesCount() Int16x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x32) OnesCount() Int16x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x4) OnesCount() Int32x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x8) OnesCount() Int32x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x16) OnesCount() Int32x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x2) OnesCount() Int64x2

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x4) OnesCount() Int64x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x8) OnesCount() Int64x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x16) OnesCount() Uint8x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x32) OnesCount() Uint8x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x64) OnesCount() Uint8x64

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x8) OnesCount() Uint16x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x16) OnesCount() Uint16x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x32) OnesCount() Uint16x32

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x4) OnesCount() Uint32x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x8) OnesCount() Uint32x8

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x16) OnesCount() Uint32x16

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x2) OnesCount() Uint64x2

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x4) OnesCount() Uint64x4

// OnesCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x8) OnesCount() Uint64x8

/* Or */

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int8x16) Or(y Int8x16) Int8x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int8x32) Or(y Int8x32) Int8x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func (x Int8x64) Or(y Int8x64) Int8x64

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int16x8) Or(y Int16x8) Int16x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int16x16) Or(y Int16x16) Int16x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func (x Int16x32) Or(y Int16x32) Int16x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int32x4) Or(y Int32x4) Int32x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int32x8) Or(y Int32x8) Int32x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func (x Int32x16) Or(y Int32x16) Int32x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int64x2) Or(y Int64x2) Int64x2

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int64x4) Or(y Int64x4) Int64x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512
func (x Int64x8) Or(y Int64x8) Int64x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint8x16) Or(y Uint8x16) Uint8x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint8x32) Or(y Uint8x32) Uint8x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func (x Uint8x64) Or(y Uint8x64) Uint8x64

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint16x8) Or(y Uint16x8) Uint16x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint16x16) Or(y Uint16x16) Uint16x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func (x Uint16x32) Or(y Uint16x32) Uint16x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint32x4) Or(y Uint32x4) Uint32x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint32x8) Or(y Uint32x8) Uint32x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512
func (x Uint32x16) Or(y Uint32x16) Uint32x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint64x2) Or(y Uint64x2) Uint64x2

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint64x4) Or(y Uint64x4) Uint64x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512
func (x Uint64x8) Or(y Uint64x8) Uint64x8

/* Permute */

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x16) Permute(indices Uint8x16) Int8x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x16) Permute(indices Uint8x16) Uint8x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x32) Permute(indices Uint8x32) Int8x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x32) Permute(indices Uint8x32) Uint8x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 6 bits (values 0-63) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x64) Permute(indices Uint8x64) Int8x64

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 6 bits (values 0-63) of each element of indices is used
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x64) Permute(indices Uint8x64) Uint8x64

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Int16x8) Permute(indices Uint16x8) Int16x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Uint16x8) Permute(indices Uint16x8) Uint16x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Int16x16) Permute(indices Uint16x16) Int16x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Uint16x16) Permute(indices Uint16x16) Uint16x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Int16x32) Permute(indices Uint16x32) Int16x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 5 bits (values 0-31) of each element of indices is used
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Uint16x32) Permute(indices Uint16x32) Uint16x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMPS, CPU Feature: AVX2
func (x Float32x8) Permute(indices Uint32x8) Float32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX2
func (x Int32x8) Permute(indices Uint32x8) Int32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX2
func (x Uint32x8) Permute(indices Uint32x8) Uint32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMPS, CPU Feature: AVX512
func (x Float32x16) Permute(indices Uint32x16) Float32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX512
func (x Int32x16) Permute(indices Uint32x16) Int32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 4 bits (values 0-15) of each element of indices is used
//
// Asm: VPERMD, CPU Feature: AVX512
func (x Uint32x16) Permute(indices Uint32x16) Uint32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 2 bits (values 0-3) of each element of indices is used
//
// Asm: VPERMPD, CPU Feature: AVX512
func (x Float64x4) Permute(indices Uint64x4) Float64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 2 bits (values 0-3) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Int64x4) Permute(indices Uint64x4) Int64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 2 bits (values 0-3) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Uint64x4) Permute(indices Uint64x4) Uint64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMPD, CPU Feature: AVX512
func (x Float64x8) Permute(indices Uint64x8) Float64x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Int64x8) Permute(indices Uint64x8) Int64x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The low 3 bits (values 0-7) of each element of indices is used
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Uint64x8) Permute(indices Uint64x8) Uint64x8

/* PermuteOrZero */

// PermuteOrZero performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
//
// Asm: VPSHUFB, CPU Feature: AVX
func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16

// PermuteOrZero performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
//
// Asm: VPSHUFB, CPU Feature: AVX
func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16

/* PermuteOrZeroGrouped */

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX2
func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX512
func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX2
func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32

// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
//
// Asm: VPSHUFB, CPU Feature: AVX512
func (x Uint8x64) PermuteOrZeroGrouped(indices Int8x64) Uint8x64

/* Reciprocal */

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCPPS, CPU Feature: AVX
func (x Float32x4) Reciprocal() Float32x4

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCPPS, CPU Feature: AVX
func (x Float32x8) Reciprocal() Float32x8

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512
func (x Float32x16) Reciprocal() Float32x16

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512
func (x Float64x2) Reciprocal() Float64x2

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512
func (x Float64x4) Reciprocal() Float64x4

// Reciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512
func (x Float64x8) Reciprocal() Float64x8

/* ReciprocalSqrt */

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRTPS, CPU Feature: AVX
func (x Float32x4) ReciprocalSqrt() Float32x4

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRTPS, CPU Feature: AVX
func (x Float32x8) ReciprocalSqrt() Float32x8

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512
func (x Float32x16) ReciprocalSqrt() Float32x16

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512
func (x Float64x2) ReciprocalSqrt() Float64x2

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512
func (x Float64x4) ReciprocalSqrt() Float64x4

// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512
func (x Float64x8) ReciprocalSqrt() Float64x8

/* RotateAllLeft */

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Int32x4) RotateAllLeft(shift uint8) Int32x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Int32x8) RotateAllLeft(shift uint8) Int32x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Int32x16) RotateAllLeft(shift uint8) Int32x16

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Int64x2) RotateAllLeft(shift uint8) Int64x2

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Int64x4) RotateAllLeft(shift uint8) Int64x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Int64x8) RotateAllLeft(shift uint8) Int64x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8

/* RotateAllRight */

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Int32x4) RotateAllRight(shift uint8) Int32x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Int32x8) RotateAllRight(shift uint8) Int32x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Int32x16) RotateAllRight(shift uint8) Int32x16

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Int64x2) RotateAllRight(shift uint8) Int64x2

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Int64x4) RotateAllRight(shift uint8) Int64x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Int64x8) RotateAllRight(shift uint8) Int64x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8

/* RotateLeft */

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func (x Int32x4) RotateLeft(y Int32x4) Int32x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func (x Int32x8) RotateLeft(y Int32x8) Int32x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func (x Int32x16) RotateLeft(y Int32x16) Int32x16

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func (x Int64x2) RotateLeft(y Int64x2) Int64x2

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func (x Int64x4) RotateLeft(y Int64x4) Int64x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func (x Int64x8) RotateLeft(y Int64x8) Int64x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512
func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512
func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8

/* RotateRight */

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func (x Int32x4) RotateRight(y Int32x4) Int32x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func (x Int32x8) RotateRight(y Int32x8) Int32x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func (x Int32x16) RotateRight(y Int32x16) Int32x16

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func (x Int64x2) RotateRight(y Int64x2) Int64x2

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func (x Int64x4) RotateRight(y Int64x4) Int64x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func (x Int64x8) RotateRight(y Int64x8) Int64x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512
func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512
func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8

/* RoundToEven */

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) RoundToEven() Float32x4

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) RoundToEven() Float32x8

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) RoundToEven() Float64x2

// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) RoundToEven() Float64x4

/* RoundToEvenScaled */

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x4) RoundToEvenScaled(prec uint8) Float32x4

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x8) RoundToEvenScaled(prec uint8) Float32x8

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x16) RoundToEvenScaled(prec uint8) Float32x16

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x2) RoundToEvenScaled(prec uint8) Float64x2

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4

// RoundToEvenScaled rounds elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8

/* RoundToEvenScaledResidue */

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x4) RoundToEvenScaledResidue(prec uint8) Float32x4

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x8) RoundToEvenScaledResidue(prec uint8) Float32x8

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x16) RoundToEvenScaledResidue(prec uint8) Float32x16

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x2) RoundToEvenScaledResidue(prec uint8) Float64x2

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4

// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8

/* SHA1FourRounds */

// SHA1FourRounds performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
// x contains the state variables a, b, c and d from upper to lower order.
// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
// result = the state variables a', b', c', d' updated after 4 rounds.
// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
//
// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: SHA1RNDS4, CPU Feature: SHA
func (x Uint32x4) SHA1FourRounds(constant uint8, y Uint32x4) Uint32x4

/* SHA1Message1 */

// SHA1Message1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
// x = {W3, W2, W1, W0}
// y = {0, 0, W5, W4}
// result = {W3^W5, W2^W4, W1^W3, W0^W2}.
//
// Asm: SHA1MSG1, CPU Feature: SHA
func (x Uint32x4) SHA1Message1(y Uint32x4) Uint32x4

/* SHA1Message2 */

// SHA1Message2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
// x = result of 2.
// y = {W15, W14, W13}
// result = {W19, W18, W17, W16}
//
// Asm: SHA1MSG2, CPU Feature: SHA
func (x Uint32x4) SHA1Message2(y Uint32x4) Uint32x4

/* SHA1NextE */

// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
// x contains the state variable a (before the 4 rounds), placed in the upper element.
// y is the elements of W array for next 4 rounds from upper to lower order.
// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
// from upper to lower order.
// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
// computation of the value of e'.)
//
// Asm: SHA1NEXTE, CPU Feature: SHA
func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4

/* SHA256Message1 */

// SHA256Message1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
// x = {W0, W1, W2, W3}
// y = {W4, 0, 0, 0}
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
//
// Asm: SHA256MSG1, CPU Feature: SHA
func (x Uint32x4) SHA256Message1(y Uint32x4) Uint32x4

/* SHA256Message2 */

// SHA256Message2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
// x = result of 2
// y = {0, 0, W14, W15}
// result = {W16, W17, W18, W19}
//
// Asm: SHA256MSG2, CPU Feature: SHA
func (x Uint32x4) SHA256Message2(y Uint32x4) Uint32x4

/* SHA256TwoRounds */

// SHA256TwoRounds does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
// x = {h, g, d, c}
// y = {f, e, b, a}
// z = {W0+K0, W1+K1}
// result = {f', e', b', a'}
// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
// the corresponding element of the W array to make the input data z.
// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
// y (the state variables a, b, e, f before the 2 rounds).
//
// Asm: SHA256RNDS2, CPU Feature: SHA
func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4

/* SaturateToInt8 */

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x8) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x16) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x32) SaturateToInt8() Int8x32

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x4) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x8) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x16) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x2) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x4) SaturateToInt8() Int8x16

// SaturateToInt8 converts element values to int8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x8) SaturateToInt8() Int8x16

/* SaturateToInt16 */

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func (x Int32x4) SaturateToInt16() Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func (x Int32x8) SaturateToInt16() Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func (x Int32x16) SaturateToInt16() Int16x16

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func (x Int64x2) SaturateToInt16() Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func (x Int64x4) SaturateToInt16() Int16x8

// SaturateToInt16 converts element values to int16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func (x Int64x8) SaturateToInt16() Int16x8

/* SaturateToInt16Concat */

// SaturateToInt16Concat converts element values to int16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKSSDW, CPU Feature: AVX
func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8

// SaturateToInt16Concat converts element values to int16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKSSDW, CPU Feature: AVX2
func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16

// SaturateToInt16Concat converts element values to int16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKSSDW, CPU Feature: AVX512
func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32

/* SaturateToInt32 */

// SaturateToInt32 converts element values to int32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func (x Int64x2) SaturateToInt32() Int32x4

// SaturateToInt32 converts element values to int32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func (x Int64x4) SaturateToInt32() Int32x4

// SaturateToInt32 converts element values to int32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func (x Int64x8) SaturateToInt32() Int32x8

/* SaturateToUint8 */

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x8) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x16) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x4) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x8) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x16) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x2) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x4) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x8) SaturateToUint8() Int8x16

// SaturateToUint8 converts element values to uint8.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSWB, CPU Feature: AVX512
func (x Uint16x32) SaturateToUint8() Uint8x32

/* SaturateToUint16 */

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func (x Uint32x4) SaturateToUint16() Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func (x Uint32x8) SaturateToUint16() Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func (x Uint32x16) SaturateToUint16() Uint16x16

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func (x Uint64x2) SaturateToUint16() Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func (x Uint64x4) SaturateToUint16() Uint16x8

// SaturateToUint16 converts element values to uint16.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func (x Uint64x8) SaturateToUint16() Uint16x8

/* SaturateToUint16Concat */

// SaturateToUint16Concat converts element values to uint16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKUSDW, CPU Feature: AVX
func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8

// SaturateToUint16Concat converts element values to uint16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKUSDW, CPU Feature: AVX2
func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16

// SaturateToUint16Concat converts element values to uint16.
// With each 128-bit as a group:
// The converted group from the first input vector will be packed to the lower part of the result vector,
// the converted group from the second input vector will be packed to the upper part of the result vector.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPACKUSDW, CPU Feature: AVX512
func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32

/* SaturateToUint32 */

// SaturateToUint32 converts element values to uint32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func (x Uint64x2) SaturateToUint32() Uint32x4

// SaturateToUint32 converts element values to uint32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func (x Uint64x4) SaturateToUint32() Uint32x4

// SaturateToUint32 converts element values to uint32.
// Conversion is done with saturation on the vector elements.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func (x Uint64x8) SaturateToUint32() Uint32x8

/* Scale */

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func (x Float32x4) Scale(y Float32x4) Float32x4

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func (x Float32x8) Scale(y Float32x8) Float32x8

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func (x Float32x16) Scale(y Float32x16) Float32x16

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func (x Float64x2) Scale(y Float64x2) Float64x2

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func (x Float64x4) Scale(y Float64x4) Float64x4

// Scale multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func (x Float64x8) Scale(y Float64x8) Float64x8

/* Select128FromPair */

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2F128, CPU Feature: AVX
func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2F128, CPU Feature: AVX
func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
//	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
//
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Int8x32) Select128FromPair(lo, hi uint8, y Int8x32) Int8x32

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
//	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
//
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Int16x16) Select128FromPair(lo, hi uint8, y Int16x16) Int16x16

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
//	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
//
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Uint8x32) Select128FromPair(lo, hi uint8, y Uint8x32) Uint8x32

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
//	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
//
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Uint16x16) Select128FromPair(lo, hi uint8, y Uint16x16) Uint16x16

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8

// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
// 128-bit elements, and returns a 256-bit result formed by
// concatenating the two elements specified by lo and hi.
// For example,
//
//	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
//
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
//
// Asm: VPERM2I128, CPU Feature: AVX2
func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4

/* SetElem */

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Float32x4) SetElem(index uint8, y float32) Float32x4

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Float64x2) SetElem(index uint8, y float64) Float64x2

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRB, CPU Feature: AVX
func (x Int8x16) SetElem(index uint8, y int8) Int8x16

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRW, CPU Feature: AVX
func (x Int16x8) SetElem(index uint8, y int16) Int16x8

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Int32x4) SetElem(index uint8, y int32) Int32x4

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Int64x2) SetElem(index uint8, y int64) Int64x2

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRB, CPU Feature: AVX
func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRW, CPU Feature: AVX
func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4

// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2

/* SetHi */

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func (x Float32x8) SetHi(y Float32x4) Float32x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func (x Float32x16) SetHi(y Float32x8) Float32x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func (x Float64x4) SetHi(y Float64x2) Float64x4

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func (x Float64x8) SetHi(y Float64x4) Float64x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int8x32) SetHi(y Int8x16) Int8x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int8x64) SetHi(y Int8x32) Int8x64

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int16x16) SetHi(y Int16x8) Int16x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int16x32) SetHi(y Int16x16) Int16x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int32x8) SetHi(y Int32x4) Int32x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int32x16) SetHi(y Int32x8) Int32x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int64x4) SetHi(y Int64x2) Int64x4

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int64x8) SetHi(y Int64x4) Int64x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint8x32) SetHi(y Uint8x16) Uint8x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint8x64) SetHi(y Uint8x32) Uint8x64

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint16x16) SetHi(y Uint16x8) Uint16x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint16x32) SetHi(y Uint16x16) Uint16x32

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint32x8) SetHi(y Uint32x4) Uint32x8

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint32x16) SetHi(y Uint32x8) Uint32x16

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint64x4) SetHi(y Uint64x2) Uint64x4

// SetHi returns x with its upper half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint64x8) SetHi(y Uint64x4) Uint64x8

/* SetLo */

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func (x Float32x8) SetLo(y Float32x4) Float32x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func (x Float32x16) SetLo(y Float32x8) Float32x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF128, CPU Feature: AVX
func (x Float64x4) SetLo(y Float64x2) Float64x4

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTF64X4, CPU Feature: AVX512
func (x Float64x8) SetLo(y Float64x4) Float64x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int8x32) SetLo(y Int8x16) Int8x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int8x64) SetLo(y Int8x32) Int8x64

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int16x16) SetLo(y Int16x8) Int16x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int16x32) SetLo(y Int16x16) Int16x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int32x8) SetLo(y Int32x4) Int32x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int32x16) SetLo(y Int32x8) Int32x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int64x4) SetLo(y Int64x2) Int64x4

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Int64x8) SetLo(y Int64x4) Int64x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint8x32) SetLo(y Uint8x16) Uint8x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint8x64) SetLo(y Uint8x32) Uint8x64

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint16x16) SetLo(y Uint16x8) Uint16x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint16x32) SetLo(y Uint16x16) Uint16x32

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint32x8) SetLo(y Uint32x4) Uint32x8

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint32x16) SetLo(y Uint32x8) Uint32x16

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint64x4) SetLo(y Uint64x2) Uint64x4

// SetLo returns x with its lower half set to y.
//
// Asm: VINSERTI64X4, CPU Feature: AVX512
func (x Uint64x8) SetLo(y Uint64x4) Uint64x8

/* ShiftAllLeft */

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX
func (x Int16x8) ShiftAllLeft(y uint64) Int16x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX2
func (x Int16x16) ShiftAllLeft(y uint64) Int16x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512
func (x Int16x32) ShiftAllLeft(y uint64) Int16x32

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX
func (x Int32x4) ShiftAllLeft(y uint64) Int32x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX2
func (x Int32x8) ShiftAllLeft(y uint64) Int32x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512
func (x Int32x16) ShiftAllLeft(y uint64) Int32x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX
func (x Int64x2) ShiftAllLeft(y uint64) Int64x2

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func (x Int64x4) ShiftAllLeft(y uint64) Int64x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512
func (x Int64x8) ShiftAllLeft(y uint64) Int64x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX
func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX2
func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512
func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX
func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX2
func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512
func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX
func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512
func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8

/* ShiftAllLeftConcat */

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8

/* ShiftAllRight */

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX
func (x Int16x8) ShiftAllRight(y uint64) Int16x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX2
func (x Int16x16) ShiftAllRight(y uint64) Int16x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX512
func (x Int16x32) ShiftAllRight(y uint64) Int16x32

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX
func (x Int32x4) ShiftAllRight(y uint64) Int32x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX2
func (x Int32x8) ShiftAllRight(y uint64) Int32x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX512
func (x Int32x16) ShiftAllRight(y uint64) Int32x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func (x Int64x2) ShiftAllRight(y uint64) Int64x2

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func (x Int64x4) ShiftAllRight(y uint64) Int64x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func (x Int64x8) ShiftAllRight(y uint64) Int64x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX
func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX2
func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX512
func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX
func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX2
func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX512
func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX
func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX2
func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX512
func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8

/* ShiftAllRightConcat */

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8

/* ShiftLeft */

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Int16x8) ShiftLeft(y Int16x8) Int16x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Int16x16) ShiftLeft(y Int16x16) Int16x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Int16x32) ShiftLeft(y Int16x32) Int16x32

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Int32x4) ShiftLeft(y Int32x4) Int32x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Int32x8) ShiftLeft(y Int32x8) Int32x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512
func (x Int32x16) ShiftLeft(y Int32x16) Int32x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Int64x2) ShiftLeft(y Int64x2) Int64x2

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Int64x4) ShiftLeft(y Int64x4) Int64x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512
func (x Int64x8) ShiftLeft(y Int64x8) Int64x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512
func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512
func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8

/* ShiftLeftConcat */

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8

/* ShiftRight */

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func (x Int16x8) ShiftRight(y Int16x8) Int16x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func (x Int16x16) ShiftRight(y Int16x16) Int16x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func (x Int16x32) ShiftRight(y Int16x32) Int16x32

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func (x Int32x4) ShiftRight(y Int32x4) Int32x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func (x Int32x8) ShiftRight(y Int32x8) Int32x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX512
func (x Int32x16) ShiftRight(y Int32x16) Int32x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func (x Int64x2) ShiftRight(y Int64x2) Int64x2

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func (x Int64x4) ShiftRight(y Int64x4) Int64x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func (x Int64x8) ShiftRight(y Int64x8) Int64x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX512
func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX512
func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8

/* ShiftRightConcat */

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8

/* Sqrt */

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX
func (x Float32x4) Sqrt() Float32x4

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX
func (x Float32x8) Sqrt() Float32x8

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX512
func (x Float32x16) Sqrt() Float32x16

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX
func (x Float64x2) Sqrt() Float64x2

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX
func (x Float64x4) Sqrt() Float64x4

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX512
func (x Float64x8) Sqrt() Float64x8

/* Sub */

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX
func (x Float32x4) Sub(y Float32x4) Float32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX
func (x Float32x8) Sub(y Float32x8) Float32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX512
func (x Float32x16) Sub(y Float32x16) Float32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX
func (x Float64x2) Sub(y Float64x2) Float64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX
func (x Float64x4) Sub(y Float64x4) Float64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX512
func (x Float64x8) Sub(y Float64x8) Float64x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX
func (x Int8x16) Sub(y Int8x16) Int8x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX2
func (x Int8x32) Sub(y Int8x32) Int8x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512
func (x Int8x64) Sub(y Int8x64) Int8x64

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX
func (x Int16x8) Sub(y Int16x8) Int16x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX2
func (x Int16x16) Sub(y Int16x16) Int16x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512
func (x Int16x32) Sub(y Int16x32) Int16x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX
func (x Int32x4) Sub(y Int32x4) Int32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX2
func (x Int32x8) Sub(y Int32x8) Int32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512
func (x Int32x16) Sub(y Int32x16) Int32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX
func (x Int64x2) Sub(y Int64x2) Int64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX2
func (x Int64x4) Sub(y Int64x4) Int64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512
func (x Int64x8) Sub(y Int64x8) Int64x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX
func (x Uint8x16) Sub(y Uint8x16) Uint8x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX2
func (x Uint8x32) Sub(y Uint8x32) Uint8x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512
func (x Uint8x64) Sub(y Uint8x64) Uint8x64

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX
func (x Uint16x8) Sub(y Uint16x8) Uint16x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX2
func (x Uint16x16) Sub(y Uint16x16) Uint16x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512
func (x Uint16x32) Sub(y Uint16x32) Uint16x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX
func (x Uint32x4) Sub(y Uint32x4) Uint32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX2
func (x Uint32x8) Sub(y Uint32x8) Uint32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512
func (x Uint32x16) Sub(y Uint32x16) Uint32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX
func (x Uint64x2) Sub(y Uint64x2) Uint64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX2
func (x Uint64x4) Sub(y Uint64x4) Uint64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512
func (x Uint64x8) Sub(y Uint64x8) Uint64x8

/* SubPairs */

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func (x Float32x4) SubPairs(y Float32x4) Float32x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func (x Float32x8) SubPairs(y Float32x8) Float32x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPD, CPU Feature: AVX
func (x Float64x2) SubPairs(y Float64x2) Float64x2

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPD, CPU Feature: AVX
func (x Float64x4) SubPairs(y Float64x4) Float64x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX
func (x Int16x8) SubPairs(y Int16x8) Int16x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
func (x Int16x16) SubPairs(y Int16x16) Int16x16

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
func (x Int32x4) SubPairs(y Int32x4) Int32x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
func (x Int32x8) SubPairs(y Int32x8) Int32x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX
func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4

// SubPairs horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8

/* SubPairsSaturated */

// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX
func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8

// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX2
func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16

/* SubSaturated */

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX
func (x Int8x16) SubSaturated(y Int8x16) Int8x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX2
func (x Int8x32) SubSaturated(y Int8x32) Int8x32

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512
func (x Int8x64) SubSaturated(y Int8x64) Int8x64

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX
func (x Int16x8) SubSaturated(y Int16x8) Int16x8

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX2
func (x Int16x16) SubSaturated(y Int16x16) Int16x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512
func (x Int16x32) SubSaturated(y Int16x32) Int16x32

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSB, CPU Feature: AVX
func (x Uint8x16) SubSaturated(y Uint8x16) Uint8x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSB, CPU Feature: AVX2
func (x Uint8x32) SubSaturated(y Uint8x32) Uint8x32

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSB, CPU Feature: AVX512
func (x Uint8x64) SubSaturated(y Uint8x64) Uint8x64

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSW, CPU Feature: AVX
func (x Uint16x8) SubSaturated(y Uint16x8) Uint16x8

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSW, CPU Feature: AVX2
func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16

// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBUSW, CPU Feature: AVX512
func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32

/* SumAbsDiff */

// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
//
// Asm: VPSADBW, CPU Feature: AVX
func (x Uint8x16) SumAbsDiff(y Uint8x16) Uint16x8

// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
//
// Asm: VPSADBW, CPU Feature: AVX2
func (x Uint8x32) SumAbsDiff(y Uint8x32) Uint16x16

// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
//
// Asm: VPSADBW, CPU Feature: AVX512
func (x Uint8x64) SumAbsDiff(y Uint8x64) Uint16x32

/* Trunc */

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Trunc() Float32x4

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Trunc() Float32x8

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Trunc() Float64x2

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Trunc() Float64x4

/* TruncScaled */

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x4) TruncScaled(prec uint8) Float32x4

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x8) TruncScaled(prec uint8) Float32x8

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512
func (x Float32x16) TruncScaled(prec uint8) Float32x16

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x2) TruncScaled(prec uint8) Float64x2

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x4) TruncScaled(prec uint8) Float64x4

// TruncScaled truncates elements with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512
func (x Float64x8) TruncScaled(prec uint8) Float64x8

/* TruncScaledResidue */

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x4) TruncScaledResidue(prec uint8) Float32x4

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x8) TruncScaledResidue(prec uint8) Float32x8

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPS, CPU Feature: AVX512
func (x Float32x16) TruncScaledResidue(prec uint8) Float32x16

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x2) TruncScaledResidue(prec uint8) Float64x2

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4

// TruncScaledResidue computes the difference after truncating with specified precision.
//
// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VREDUCEPD, CPU Feature: AVX512
func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8

/* TruncateToInt8 */

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Int16x8) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Int16x16) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Int16x32) TruncateToInt8() Int8x32

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Int32x4) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Int32x8) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Int32x16) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Int64x2) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Int64x4) TruncateToInt8() Int8x16

// TruncateToInt8 converts element values to int8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Int64x8) TruncateToInt8() Int8x16

/* TruncateToInt16 */

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Int32x4) TruncateToInt16() Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Int32x8) TruncateToInt16() Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Int32x16) TruncateToInt16() Int16x16

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Int64x2) TruncateToInt16() Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Int64x4) TruncateToInt16() Int16x8

// TruncateToInt16 converts element values to int16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Int64x8) TruncateToInt16() Int16x8

/* TruncateToInt32 */

// TruncateToInt32 converts element values to int32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Int64x2) TruncateToInt32() Int32x4

// TruncateToInt32 converts element values to int32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Int64x4) TruncateToInt32() Int32x4

// TruncateToInt32 converts element values to int32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Int64x8) TruncateToInt32() Int32x8

/* TruncateToUint8 */

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Uint16x8) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Uint16x16) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Uint16x32) TruncateToUint8() Uint8x32

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Uint32x4) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Uint32x8) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Uint32x16) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Uint64x2) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Uint64x4) TruncateToUint8() Uint8x16

// TruncateToUint8 converts element values to uint8.
// Conversion is done with truncation on the vector elements.
// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Uint64x8) TruncateToUint8() Uint8x16

/* TruncateToUint16 */

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Uint32x4) TruncateToUint16() Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Uint32x8) TruncateToUint16() Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Uint32x16) TruncateToUint16() Uint16x16

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Uint64x2) TruncateToUint16() Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Uint64x4) TruncateToUint16() Uint16x8

// TruncateToUint16 converts element values to uint16.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Uint64x8) TruncateToUint16() Uint16x8

/* TruncateToUint32 */

// TruncateToUint32 converts element values to uint32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Uint64x2) TruncateToUint32() Uint32x4

// TruncateToUint32 converts element values to uint32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Uint64x4) TruncateToUint32() Uint32x4

// TruncateToUint32 converts element values to uint32.
// Conversion is done with truncation on the vector elements.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Uint64x8) TruncateToUint32() Uint32x8

/* Xor */

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int8x16) Xor(y Int8x16) Int8x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int8x32) Xor(y Int8x32) Int8x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func (x Int8x64) Xor(y Int8x64) Int8x64

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int16x8) Xor(y Int16x8) Int16x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int16x16) Xor(y Int16x16) Int16x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func (x Int16x32) Xor(y Int16x32) Int16x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int32x4) Xor(y Int32x4) Int32x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int32x8) Xor(y Int32x8) Int32x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func (x Int32x16) Xor(y Int32x16) Int32x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int64x2) Xor(y Int64x2) Int64x2

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int64x4) Xor(y Int64x4) Int64x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512
func (x Int64x8) Xor(y Int64x8) Int64x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint8x16) Xor(y Uint8x16) Uint8x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint8x32) Xor(y Uint8x32) Uint8x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func (x Uint8x64) Xor(y Uint8x64) Uint8x64

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint16x8) Xor(y Uint16x8) Uint16x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint16x16) Xor(y Uint16x16) Uint16x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func (x Uint16x32) Xor(y Uint16x32) Uint16x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint32x4) Xor(y Uint32x4) Uint32x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint32x8) Xor(y Uint32x8) Uint32x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512
func (x Uint32x16) Xor(y Uint32x16) Uint32x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint64x2) Xor(y Uint64x2) Uint64x2

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint64x4) Xor(y Uint64x4) Uint64x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512
func (x Uint64x8) Xor(y Uint64x8) Uint64x8

// Float64x2 converts from Float32x4 to Float64x2
func (from Float32x4) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Float32x4 to Int8x16
func (from Float32x4) AsInt8x16() (to Int8x16)

// Int16x8 converts from Float32x4 to Int16x8
func (from Float32x4) AsInt16x8() (to Int16x8)

// Int32x4 converts from Float32x4 to Int32x4
func (from Float32x4) AsInt32x4() (to Int32x4)

// Int64x2 converts from Float32x4 to Int64x2
func (from Float32x4) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Float32x4 to Uint8x16
func (from Float32x4) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Float32x4 to Uint16x8
func (from Float32x4) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Float32x4 to Uint32x4
func (from Float32x4) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Float32x4 to Uint64x2
func (from Float32x4) AsUint64x2() (to Uint64x2)

// Float64x4 converts from Float32x8 to Float64x4
func (from Float32x8) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Float32x8 to Int8x32
func (from Float32x8) AsInt8x32() (to Int8x32)

// Int16x16 converts from Float32x8 to Int16x16
func (from Float32x8) AsInt16x16() (to Int16x16)

// Int32x8 converts from Float32x8 to Int32x8
func (from Float32x8) AsInt32x8() (to Int32x8)

// Int64x4 converts from Float32x8 to Int64x4
func (from Float32x8) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Float32x8 to Uint8x32
func (from Float32x8) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Float32x8 to Uint16x16
func (from Float32x8) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Float32x8 to Uint32x8
func (from Float32x8) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Float32x8 to Uint64x4
func (from Float32x8) AsUint64x4() (to Uint64x4)

// Float64x8 converts from Float32x16 to Float64x8
func (from Float32x16) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Float32x16 to Int8x64
func (from Float32x16) AsInt8x64() (to Int8x64)

// Int16x32 converts from Float32x16 to Int16x32
func (from Float32x16) AsInt16x32() (to Int16x32)

// Int32x16 converts from Float32x16 to Int32x16
func (from Float32x16) AsInt32x16() (to Int32x16)

// Int64x8 converts from Float32x16 to Int64x8
func (from Float32x16) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Float32x16 to Uint8x64
func (from Float32x16) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Float32x16 to Uint16x32
func (from Float32x16) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Float32x16 to Uint32x16
func (from Float32x16) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Float32x16 to Uint64x8
func (from Float32x16) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Float64x2 to Float32x4
func (from Float64x2) AsFloat32x4() (to Float32x4)

// Int8x16 converts from Float64x2 to Int8x16
func (from Float64x2) AsInt8x16() (to Int8x16)

// Int16x8 converts from Float64x2 to Int16x8
func (from Float64x2) AsInt16x8() (to Int16x8)

// Int32x4 converts from Float64x2 to Int32x4
func (from Float64x2) AsInt32x4() (to Int32x4)

// Int64x2 converts from Float64x2 to Int64x2
func (from Float64x2) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Float64x2 to Uint8x16
func (from Float64x2) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Float64x2 to Uint16x8
func (from Float64x2) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Float64x2 to Uint32x4
func (from Float64x2) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Float64x2 to Uint64x2
func (from Float64x2) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Float64x4 to Float32x8
func (from Float64x4) AsFloat32x8() (to Float32x8)

// Int8x32 converts from Float64x4 to Int8x32
func (from Float64x4) AsInt8x32() (to Int8x32)

// Int16x16 converts from Float64x4 to Int16x16
func (from Float64x4) AsInt16x16() (to Int16x16)

// Int32x8 converts from Float64x4 to Int32x8
func (from Float64x4) AsInt32x8() (to Int32x8)

// Int64x4 converts from Float64x4 to Int64x4
func (from Float64x4) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Float64x4 to Uint8x32
func (from Float64x4) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Float64x4 to Uint16x16
func (from Float64x4) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Float64x4 to Uint32x8
func (from Float64x4) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Float64x4 to Uint64x4
func (from Float64x4) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Float64x8 to Float32x16
func (from Float64x8) AsFloat32x16() (to Float32x16)

// Int8x64 converts from Float64x8 to Int8x64
func (from Float64x8) AsInt8x64() (to Int8x64)

// Int16x32 converts from Float64x8 to Int16x32
func (from Float64x8) AsInt16x32() (to Int16x32)

// Int32x16 converts from Float64x8 to Int32x16
func (from Float64x8) AsInt32x16() (to Int32x16)

// Int64x8 converts from Float64x8 to Int64x8
func (from Float64x8) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Float64x8 to Uint8x64
func (from Float64x8) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Float64x8 to Uint16x32
func (from Float64x8) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Float64x8 to Uint32x16
func (from Float64x8) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Float64x8 to Uint64x8
func (from Float64x8) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int8x16 to Float32x4
func (from Int8x16) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int8x16 to Float64x2
func (from Int8x16) AsFloat64x2() (to Float64x2)

// Int16x8 converts from Int8x16 to Int16x8
func (from Int8x16) AsInt16x8() (to Int16x8)

// Int32x4 converts from Int8x16 to Int32x4
func (from Int8x16) AsInt32x4() (to Int32x4)

// Int64x2 converts from Int8x16 to Int64x2
func (from Int8x16) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Int8x16 to Uint8x16
func (from Int8x16) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int8x16 to Uint16x8
func (from Int8x16) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int8x16 to Uint32x4
func (from Int8x16) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int8x16 to Uint64x2
func (from Int8x16) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int8x32 to Float32x8
func (from Int8x32) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int8x32 to Float64x4
func (from Int8x32) AsFloat64x4() (to Float64x4)

// Int16x16 converts from Int8x32 to Int16x16
func (from Int8x32) AsInt16x16() (to Int16x16)

// Int32x8 converts from Int8x32 to Int32x8
func (from Int8x32) AsInt32x8() (to Int32x8)

// Int64x4 converts from Int8x32 to Int64x4
func (from Int8x32) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Int8x32 to Uint8x32
func (from Int8x32) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int8x32 to Uint16x16
func (from Int8x32) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int8x32 to Uint32x8
func (from Int8x32) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int8x32 to Uint64x4
func (from Int8x32) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int8x64 to Float32x16
func (from Int8x64) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int8x64 to Float64x8
func (from Int8x64) AsFloat64x8() (to Float64x8)

// Int16x32 converts from Int8x64 to Int16x32
func (from Int8x64) AsInt16x32() (to Int16x32)

// Int32x16 converts from Int8x64 to Int32x16
func (from Int8x64) AsInt32x16() (to Int32x16)

// Int64x8 converts from Int8x64 to Int64x8
func (from Int8x64) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Int8x64 to Uint8x64
func (from Int8x64) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int8x64 to Uint16x32
func (from Int8x64) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int8x64 to Uint32x16
func (from Int8x64) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int8x64 to Uint64x8
func (from Int8x64) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int16x8 to Float32x4
func (from Int16x8) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int16x8 to Float64x2
func (from Int16x8) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Int16x8 to Int8x16
func (from Int16x8) AsInt8x16() (to Int8x16)

// Int32x4 converts from Int16x8 to Int32x4
func (from Int16x8) AsInt32x4() (to Int32x4)

// Int64x2 converts from Int16x8 to Int64x2
func (from Int16x8) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Int16x8 to Uint8x16
func (from Int16x8) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int16x8 to Uint16x8
func (from Int16x8) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int16x8 to Uint32x4
func (from Int16x8) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int16x8 to Uint64x2
func (from Int16x8) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int16x16 to Float32x8
func (from Int16x16) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int16x16 to Float64x4
func (from Int16x16) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Int16x16 to Int8x32
func (from Int16x16) AsInt8x32() (to Int8x32)

// Int32x8 converts from Int16x16 to Int32x8
func (from Int16x16) AsInt32x8() (to Int32x8)

// Int64x4 converts from Int16x16 to Int64x4
func (from Int16x16) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Int16x16 to Uint8x32
func (from Int16x16) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int16x16 to Uint16x16
func (from Int16x16) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int16x16 to Uint32x8
func (from Int16x16) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int16x16 to Uint64x4
func (from Int16x16) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int16x32 to Float32x16
func (from Int16x32) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int16x32 to Float64x8
func (from Int16x32) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Int16x32 to Int8x64
func (from Int16x32) AsInt8x64() (to Int8x64)

// Int32x16 converts from Int16x32 to Int32x16
func (from Int16x32) AsInt32x16() (to Int32x16)

// Int64x8 converts from Int16x32 to Int64x8
func (from Int16x32) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Int16x32 to Uint8x64
func (from Int16x32) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int16x32 to Uint16x32
func (from Int16x32) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int16x32 to Uint32x16
func (from Int16x32) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int16x32 to Uint64x8
func (from Int16x32) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int32x4 to Float32x4
func (from Int32x4) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int32x4 to Float64x2
func (from Int32x4) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Int32x4 to Int8x16
func (from Int32x4) AsInt8x16() (to Int8x16)

// Int16x8 converts from Int32x4 to Int16x8
func (from Int32x4) AsInt16x8() (to Int16x8)

// Int64x2 converts from Int32x4 to Int64x2
func (from Int32x4) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Int32x4 to Uint8x16
func (from Int32x4) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int32x4 to Uint16x8
func (from Int32x4) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int32x4 to Uint32x4
func (from Int32x4) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int32x4 to Uint64x2
func (from Int32x4) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int32x8 to Float32x8
func (from Int32x8) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int32x8 to Float64x4
func (from Int32x8) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Int32x8 to Int8x32
func (from Int32x8) AsInt8x32() (to Int8x32)

// Int16x16 converts from Int32x8 to Int16x16
func (from Int32x8) AsInt16x16() (to Int16x16)

// Int64x4 converts from Int32x8 to Int64x4
func (from Int32x8) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Int32x8 to Uint8x32
func (from Int32x8) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int32x8 to Uint16x16
func (from Int32x8) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int32x8 to Uint32x8
func (from Int32x8) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int32x8 to Uint64x4
func (from Int32x8) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int32x16 to Float32x16
func (from Int32x16) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int32x16 to Float64x8
func (from Int32x16) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Int32x16 to Int8x64
func (from Int32x16) AsInt8x64() (to Int8x64)

// Int16x32 converts from Int32x16 to Int16x32
func (from Int32x16) AsInt16x32() (to Int16x32)

// Int64x8 converts from Int32x16 to Int64x8
func (from Int32x16) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Int32x16 to Uint8x64
func (from Int32x16) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int32x16 to Uint16x32
func (from Int32x16) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int32x16 to Uint32x16
func (from Int32x16) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int32x16 to Uint64x8
func (from Int32x16) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int64x2 to Float32x4
func (from Int64x2) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int64x2 to Float64x2
func (from Int64x2) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Int64x2 to Int8x16
func (from Int64x2) AsInt8x16() (to Int8x16)

// Int16x8 converts from Int64x2 to Int16x8
func (from Int64x2) AsInt16x8() (to Int16x8)

// Int32x4 converts from Int64x2 to Int32x4
func (from Int64x2) AsInt32x4() (to Int32x4)

// Uint8x16 converts from Int64x2 to Uint8x16
func (from Int64x2) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int64x2 to Uint16x8
func (from Int64x2) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int64x2 to Uint32x4
func (from Int64x2) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int64x2 to Uint64x2
func (from Int64x2) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int64x4 to Float32x8
func (from Int64x4) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int64x4 to Float64x4
func (from Int64x4) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Int64x4 to Int8x32
func (from Int64x4) AsInt8x32() (to Int8x32)

// Int16x16 converts from Int64x4 to Int16x16
func (from Int64x4) AsInt16x16() (to Int16x16)

// Int32x8 converts from Int64x4 to Int32x8
func (from Int64x4) AsInt32x8() (to Int32x8)

// Uint8x32 converts from Int64x4 to Uint8x32
func (from Int64x4) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int64x4 to Uint16x16
func (from Int64x4) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int64x4 to Uint32x8
func (from Int64x4) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int64x4 to Uint64x4
func (from Int64x4) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int64x8 to Float32x16
func (from Int64x8) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int64x8 to Float64x8
func (from Int64x8) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Int64x8 to Int8x64
func (from Int64x8) AsInt8x64() (to Int8x64)

// Int16x32 converts from Int64x8 to Int16x32
func (from Int64x8) AsInt16x32() (to Int16x32)

// Int32x16 converts from Int64x8 to Int32x16
func (from Int64x8) AsInt32x16() (to Int32x16)

// Uint8x64 converts from Int64x8 to Uint8x64
func (from Int64x8) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int64x8 to Uint16x32
func (from Int64x8) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int64x8 to Uint32x16
func (from Int64x8) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int64x8 to Uint64x8
func (from Int64x8) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint8x16 to Float32x4
func (from Uint8x16) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint8x16 to Float64x2
func (from Uint8x16) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint8x16 to Int8x16
func (from Uint8x16) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint8x16 to Int16x8
func (from Uint8x16) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint8x16 to Int32x4
func (from Uint8x16) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint8x16 to Int64x2
func (from Uint8x16) AsInt64x2() (to Int64x2)

// Uint16x8 converts from Uint8x16 to Uint16x8
func (from Uint8x16) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Uint8x16 to Uint32x4
func (from Uint8x16) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Uint8x16 to Uint64x2
func (from Uint8x16) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Uint8x32 to Float32x8
func (from Uint8x32) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint8x32 to Float64x4
func (from Uint8x32) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint8x32 to Int8x32
func (from Uint8x32) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint8x32 to Int16x16
func (from Uint8x32) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint8x32 to Int32x8
func (from Uint8x32) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint8x32 to Int64x4
func (from Uint8x32) AsInt64x4() (to Int64x4)

// Uint16x16 converts from Uint8x32 to Uint16x16
func (from Uint8x32) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Uint8x32 to Uint32x8
func (from Uint8x32) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Uint8x32 to Uint64x4
func (from Uint8x32) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Uint8x64 to Float32x16
func (from Uint8x64) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint8x64 to Float64x8
func (from Uint8x64) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint8x64 to Int8x64
func (from Uint8x64) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint8x64 to Int16x32
func (from Uint8x64) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint8x64 to Int32x16
func (from Uint8x64) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint8x64 to Int64x8
func (from Uint8x64) AsInt64x8() (to Int64x8)

// Uint16x32 converts from Uint8x64 to Uint16x32
func (from Uint8x64) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Uint8x64 to Uint32x16
func (from Uint8x64) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Uint8x64 to Uint64x8
func (from Uint8x64) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint16x8 to Float32x4
func (from Uint16x8) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint16x8 to Float64x2
func (from Uint16x8) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint16x8 to Int8x16
func (from Uint16x8) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint16x8 to Int16x8
func (from Uint16x8) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint16x8 to Int32x4
func (from Uint16x8) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint16x8 to Int64x2
func (from Uint16x8) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Uint16x8 to Uint8x16
func (from Uint16x8) AsUint8x16() (to Uint8x16)

// Uint32x4 converts from Uint16x8 to Uint32x4
func (from Uint16x8) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Uint16x8 to Uint64x2
func (from Uint16x8) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Uint16x16 to Float32x8
func (from Uint16x16) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint16x16 to Float64x4
func (from Uint16x16) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint16x16 to Int8x32
func (from Uint16x16) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint16x16 to Int16x16
func (from Uint16x16) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint16x16 to Int32x8
func (from Uint16x16) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint16x16 to Int64x4
func (from Uint16x16) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Uint16x16 to Uint8x32
func (from Uint16x16) AsUint8x32() (to Uint8x32)

// Uint32x8 converts from Uint16x16 to Uint32x8
func (from Uint16x16) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Uint16x16 to Uint64x4
func (from Uint16x16) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Uint16x32 to Float32x16
func (from Uint16x32) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint16x32 to Float64x8
func (from Uint16x32) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint16x32 to Int8x64
func (from Uint16x32) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint16x32 to Int16x32
func (from Uint16x32) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint16x32 to Int32x16
func (from Uint16x32) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint16x32 to Int64x8
func (from Uint16x32) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Uint16x32 to Uint8x64
func (from Uint16x32) AsUint8x64() (to Uint8x64)

// Uint32x16 converts from Uint16x32 to Uint32x16
func (from Uint16x32) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Uint16x32 to Uint64x8
func (from Uint16x32) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint32x4 to Float32x4
func (from Uint32x4) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint32x4 to Float64x2
func (from Uint32x4) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint32x4 to Int8x16
func (from Uint32x4) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint32x4 to Int16x8
func (from Uint32x4) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint32x4 to Int32x4
func (from Uint32x4) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint32x4 to Int64x2
func (from Uint32x4) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Uint32x4 to Uint8x16
func (from Uint32x4) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Uint32x4 to Uint16x8
func (from Uint32x4) AsUint16x8() (to Uint16x8)

// Uint64x2 converts from Uint32x4 to Uint64x2
func (from Uint32x4) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Uint32x8 to Float32x8
func (from Uint32x8) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint32x8 to Float64x4
func (from Uint32x8) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint32x8 to Int8x32
func (from Uint32x8) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint32x8 to Int16x16
func (from Uint32x8) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint32x8 to Int32x8
func (from Uint32x8) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint32x8 to Int64x4
func (from Uint32x8) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Uint32x8 to Uint8x32
func (from Uint32x8) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Uint32x8 to Uint16x16
func (from Uint32x8) AsUint16x16() (to Uint16x16)

// Uint64x4 converts from Uint32x8 to Uint64x4
func (from Uint32x8) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Uint32x16 to Float32x16
func (from Uint32x16) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint32x16 to Float64x8
func (from Uint32x16) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint32x16 to Int8x64
func (from Uint32x16) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint32x16 to Int16x32
func (from Uint32x16) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint32x16 to Int32x16
func (from Uint32x16) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint32x16 to Int64x8
func (from Uint32x16) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Uint32x16 to Uint8x64
func (from Uint32x16) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Uint32x16 to Uint16x32
func (from Uint32x16) AsUint16x32() (to Uint16x32)

// Uint64x8 converts from Uint32x16 to Uint64x8
func (from Uint32x16) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint64x2 to Float32x4
func (from Uint64x2) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint64x2 to Float64x2
func (from Uint64x2) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint64x2 to Int8x16
func (from Uint64x2) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint64x2 to Int16x8
func (from Uint64x2) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint64x2 to Int32x4
func (from Uint64x2) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint64x2 to Int64x2
func (from Uint64x2) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Uint64x2 to Uint8x16
func (from Uint64x2) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Uint64x2 to Uint16x8
func (from Uint64x2) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Uint64x2 to Uint32x4
func (from Uint64x2) AsUint32x4() (to Uint32x4)

// Float32x8 converts from Uint64x4 to Float32x8
func (from Uint64x4) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint64x4 to Float64x4
func (from Uint64x4) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint64x4 to Int8x32
func (from Uint64x4) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint64x4 to Int16x16
func (from Uint64x4) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint64x4 to Int32x8
func (from Uint64x4) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint64x4 to Int64x4
func (from Uint64x4) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Uint64x4 to Uint8x32
func (from Uint64x4) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Uint64x4 to Uint16x16
func (from Uint64x4) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Uint64x4 to Uint32x8
func (from Uint64x4) AsUint32x8() (to Uint32x8)

// Float32x16 converts from Uint64x8 to Float32x16
func (from Uint64x8) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint64x8 to Float64x8
func (from Uint64x8) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint64x8 to Int8x64
func (from Uint64x8) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint64x8 to Int16x32
func (from Uint64x8) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint64x8 to Int32x16
func (from Uint64x8) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint64x8 to Int64x8
func (from Uint64x8) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Uint64x8 to Uint8x64
func (from Uint64x8) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Uint64x8 to Uint16x32
func (from Uint64x8) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Uint64x8 to Uint32x16
func (from Uint64x8) AsUint32x16() (to Uint32x16)

// ToInt8x16 converts from Mask8x16 to Int8x16
func (from Mask8x16) ToInt8x16() (to Int8x16)

// asMask converts from Int8x16 to Mask8x16
func (from Int8x16) asMask() (to Mask8x16)

func (x Mask8x16) And(y Mask8x16) Mask8x16

func (x Mask8x16) Or(y Mask8x16) Mask8x16

// ToInt8x32 converts from Mask8x32 to Int8x32
func (from Mask8x32) ToInt8x32() (to Int8x32)

// asMask converts from Int8x32 to Mask8x32
func (from Int8x32) asMask() (to Mask8x32)

func (x Mask8x32) And(y Mask8x32) Mask8x32

func (x Mask8x32) Or(y Mask8x32) Mask8x32

// ToInt8x64 converts from Mask8x64 to Int8x64
func (from Mask8x64) ToInt8x64() (to Int8x64)

// asMask converts from Int8x64 to Mask8x64
func (from Int8x64) asMask() (to Mask8x64)

func (x Mask8x64) And(y Mask8x64) Mask8x64

func (x Mask8x64) Or(y Mask8x64) Mask8x64

// ToInt16x8 converts from Mask16x8 to Int16x8
func (from Mask16x8) ToInt16x8() (to Int16x8)

// asMask converts from Int16x8 to Mask16x8
func (from Int16x8) asMask() (to Mask16x8)

func (x Mask16x8) And(y Mask16x8) Mask16x8

func (x Mask16x8) Or(y Mask16x8) Mask16x8

// ToInt16x16 converts from Mask16x16 to Int16x16
func (from Mask16x16) ToInt16x16() (to Int16x16)

// asMask converts from Int16x16 to Mask16x16
func (from Int16x16) asMask() (to Mask16x16)

func (x Mask16x16) And(y Mask16x16) Mask16x16

func (x Mask16x16) Or(y Mask16x16) Mask16x16

// ToInt16x32 converts from Mask16x32 to Int16x32
func (from Mask16x32) ToInt16x32() (to Int16x32)

// asMask converts from Int16x32 to Mask16x32
func (from Int16x32) asMask() (to Mask16x32)

func (x Mask16x32) And(y Mask16x32) Mask16x32

func (x Mask16x32) Or(y Mask16x32) Mask16x32

// ToInt32x4 converts from Mask32x4 to Int32x4
func (from Mask32x4) ToInt32x4() (to Int32x4)

// asMask converts from Int32x4 to Mask32x4
func (from Int32x4) asMask() (to Mask32x4)

func (x Mask32x4) And(y Mask32x4) Mask32x4

func (x Mask32x4) Or(y Mask32x4) Mask32x4

// ToInt32x8 converts from Mask32x8 to Int32x8
func (from Mask32x8) ToInt32x8() (to Int32x8)

// asMask converts from Int32x8 to Mask32x8
func (from Int32x8) asMask() (to Mask32x8)

func (x Mask32x8) And(y Mask32x8) Mask32x8

func (x Mask32x8) Or(y Mask32x8) Mask32x8

// ToInt32x16 converts from Mask32x16 to Int32x16
func (from Mask32x16) ToInt32x16() (to Int32x16)

// asMask converts from Int32x16 to Mask32x16
func (from Int32x16) asMask() (to Mask32x16)

func (x Mask32x16) And(y Mask32x16) Mask32x16

func (x Mask32x16) Or(y Mask32x16) Mask32x16

// ToInt64x2 converts from Mask64x2 to Int64x2
func (from Mask64x2) ToInt64x2() (to Int64x2)

// asMask converts from Int64x2 to Mask64x2
func (from Int64x2) asMask() (to Mask64x2)

func (x Mask64x2) And(y Mask64x2) Mask64x2

func (x Mask64x2) Or(y Mask64x2) Mask64x2

// ToInt64x4 converts from Mask64x4 to Int64x4
func (from Mask64x4) ToInt64x4() (to Int64x4)

// asMask converts from Int64x4 to Mask64x4
func (from Int64x4) asMask() (to Mask64x4)

func (x Mask64x4) And(y Mask64x4) Mask64x4

func (x Mask64x4) Or(y Mask64x4) Mask64x4

// ToInt64x8 converts from Mask64x8 to Int64x8
func (from Mask64x8) ToInt64x8() (to Int64x8)

// asMask converts from Int64x8 to Mask64x8
func (from Int64x8) asMask() (to Mask64x8)

func (x Mask64x8) And(y Mask64x8) Mask64x8

func (x Mask64x8) Or(y Mask64x8) Mask64x8


The pages are generated with Golds v0.8.3-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.