// Copyright 2025 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:build goexperiment.simdpackage archsimdimport// Implementation of all the {Int,Uint}{8,16} load and store slice part// functions and methods for 128-bit and 256-bit vectors./* pointer-punning functions for chunked slice part loads. */func int16atP8( *int8) *int16 {return (*int16)(unsafe.Pointer())}func int32atP8( *int8) *int32 {return (*int32)(unsafe.Pointer())}func int64atP8( *int8) *int64 {return (*int64)(unsafe.Pointer())}func int32atP16( *int16) *int32 {return (*int32)(unsafe.Pointer())}func int64atP16( *int16) *int64 {return (*int64)(unsafe.Pointer())}func int64atP32( *int32) *int64 {return (*int64)(unsafe.Pointer())}func int32atP64( *int64) *int32 {return (*int32)(unsafe.Pointer())}/* These two masks are used by generated code */var vecMask64 = [16]int64{ -1, -1, -1, -1, -1, -1, -1, -1,0, 0, 0, 0,0, 0, 0, 0,}var vecMask32 = [32]int32{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,0, 0, 0, 0,0, 0, 0, 0,0, 0, 0, 0,0, 0, 0, 0,}/* 256-bit int vector loads and stores made from 128-bit parts */// LoadInt8x32SlicePart loads a Int8x32 from the slice s.// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.// If s has 32 or more elements, the function is equivalent to LoadInt8x32Slice.func ( []int8) Int8x32 { := len()if >= 32 {returnLoadInt8x32Slice() }varInt8x32if == 0 {return }if > 16 {return .SetLo(LoadInt8x16Slice()).SetHi(LoadInt8x16SlicePart([16:])) } else {return .SetLo(LoadInt8x16SlicePart()) }}// LoadInt16x16SlicePart loads a Int16x16 from the slice s.// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.// If s has 16 or more elements, the function is equivalent to LoadInt16x16Slice.func ( []int16) Int16x16 { := len()if >= 16 {returnLoadInt16x16Slice() }varInt16x16if == 0 {return }if > 8 {return .SetLo(LoadInt16x8Slice()).SetHi(LoadInt16x8SlicePart([8:])) } else {return .SetLo(LoadInt16x8SlicePart()) }}// StoreSlicePart stores the elements of x into the slice s.// It stores as many elements as will fit in s.// If s has 32 or more elements, the method is equivalent to x.StoreSlice.func ( Int8x32) ( []int8) { := len()if >= 32 { .StoreSlice()return }if == 0 {return }if > 16 { .GetLo().StoreSlice() .GetHi().StoreSlicePart([16:]) } else { // fits in one .GetLo().StoreSlicePart() }}// StoreSlicePart stores the elements of x into the slice s.// It stores as many elements as will fit in s.// If s has 16 or more elements, the method is equivalent to x.StoreSlice.func ( Int16x16) ( []int16) { := len()if >= 16 { .StoreSlice()return }if == 0 {return }if > 8 { .GetLo().StoreSlice() .GetHi().StoreSlicePart([8:]) } else { // fits in one .GetLo().StoreSlicePart() }}/* 128-bit vector load and store slice parts for 8 and 16-bit int elements */// LoadInt8x16SlicePart loads a Int8x16 from the slice s.// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.// If s has 16 or more elements, the function is equivalent to LoadInt8x16Slice.func ( []int8) Int8x16 { := len()if >= 16 {returnLoadInt8x16Slice() }varInt8x16if == 0 {return }if >= 8 { // 8-15 = .AsInt64x2().SetElem(0, *int64atP8(&[0])).AsInt8x16()if >= 12 { // 12, 13, 14, 15 = .AsInt32x4().SetElem(8/4, *int32atP8(&[8])).AsInt8x16()if >= 14 { = .AsInt16x8().SetElem(12/2, *int16atP8(&[12])).AsInt8x16()if == 15 { = .SetElem(14, [14]) } } elseif == 13 { = .SetElem(12, [12]) } } elseif >= 10 { // 10, 11 = .AsInt16x8().SetElem(8/2, *int16atP8(&[8])).AsInt8x16()if == 11 { = .SetElem(10, [10]) } } elseif == 9 { = .SetElem(8, [8]) } } elseif >= 4 { // 4-7 = .AsInt32x4().SetElem(0, *int32atP8(&[0])).AsInt8x16()if >= 6 { = .AsInt16x8().SetElem(4/2, *int16atP8(&[4])).AsInt8x16()if == 7 { = .SetElem(6, [6]) } } elseif == 5 { = .SetElem(4, [4]) } } elseif >= 2 { // 2,3 = .AsInt16x8().SetElem(0, *int16atP8(&[0])).AsInt8x16()if == 3 { = .SetElem(2, [2]) } } else { // l == 1 = .SetElem(0, [0]) }return}// StoreSlicePart stores the elements of x into the slice s.// It stores as many elements as will fit in s.// If s has 16 or more elements, the method is equivalent to x.StoreSlice.func ( Int8x16) ( []int8) { := len()if >= 16 { .StoreSlice()return }if == 0 {return }if >= 8 { // 8-15 *int64atP8(&[0]) = .AsInt64x2().GetElem(0)if >= 12 { // 12, 13, 14, 15 *int32atP8(&[8]) = .AsInt32x4().GetElem(8 / 4)if >= 14 { *int16atP8(&[12]) = .AsInt16x8().GetElem(12 / 2)if == 15 { [14] = .GetElem(14) } } elseif == 13 { [12] = .GetElem(12) } } elseif >= 10 { // 10, 11 *int16atP8(&[8]) = .AsInt16x8().GetElem(8 / 2)if == 11 { [10] = .GetElem(10) } } elseif == 9 { [8] = .GetElem(8) } } elseif >= 4 { // 4-7 *int32atP8(&[0]) = .AsInt32x4().GetElem(0)if >= 6 { *int16atP8(&[4]) = .AsInt16x8().GetElem(4 / 2)if == 7 { [6] = .GetElem(6) } } elseif == 5 { [4] = .GetElem(4) } } elseif >= 2 { // 2,3 *int16atP8(&[0]) = .AsInt16x8().GetElem(0)if == 3 { [2] = .GetElem(2) } } else { // l == 1 [0] = .GetElem(0) }}// LoadInt16x8SlicePart loads a Int16x8 from the slice s.// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.// If s has 8 or more elements, the function is equivalent to LoadInt16x8Slice.func ( []int16) Int16x8 { := len()if >= 8 {returnLoadInt16x8Slice() }varInt16x8if == 0 {return }if >= 4 { // 4-7 = .AsInt64x2().SetElem(0, *int64atP16(&[0])).AsInt16x8()if >= 6 { = .AsInt32x4().SetElem(4/2, *int32atP16(&[4])).AsInt16x8()if == 7 { = .SetElem(6, [6]) } } elseif == 5 { = .SetElem(4, [4]) } } elseif >= 2 { // 2,3 = .AsInt32x4().SetElem(0, *int32atP16(&[0])).AsInt16x8()if == 3 { = .SetElem(2, [2]) } } else { // l == 1 = .SetElem(0, [0]) }return}// StoreSlicePart stores the elements of x into the slice s.// It stores as many elements as will fit in s.// If s has 8 or more elements, the method is equivalent to x.StoreSlice.func ( Int16x8) ( []int16) { := len()if >= 8 { .StoreSlice()return }if == 0 {return }if >= 4 { // 4-7 *int64atP16(&[0]) = .AsInt64x2().GetElem(0)if >= 6 { *int32atP16(&[4]) = .AsInt32x4().GetElem(4 / 2)if == 7 { [6] = .GetElem(6) } } elseif == 5 { [4] = .GetElem(4) } } elseif >= 2 { // 2,3 *int32atP16(&[0]) = .AsInt32x4().GetElem(0)if == 3 { [2] = .GetElem(2) } } else { // l == 1 [0] = .GetElem(0) }return}
The pages are generated with Goldsv0.8.3-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.