// Copyright 2025 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package asmgen// shiftVU generates lshVU and rshVU, which do// z, c = x << s and z, c = x >> s, for 0 < s < _W.func shiftVU( *Asm, string) {// Because these routines can be called for z.Lsh(z, N) and z.Rsh(z, N), // the input and output slices may be aliased at different offsets. // For example (on 64-bit systems), during z.Lsh(z, 65), &z[0] == &x[1], // and during z.Rsh(z, 65), &z[1] == &x[0]. // For left shift, we must process the slices from len(z)-1 down to 0, // so that we don't overwrite a word before we need to read it. // For right shift, we must process the slices from 0 up to len(z)-1. // The different traversals at least make the two cases more consistent, // since we're always delaying the output by one word compared // to the input. := .Func("func " + + "(z, x []Word, s uint) (c Word)")// Check for no input early, since we need to start by reading 1 word. := .Arg("z_len") .JmpZero(, "ret0")// Start loop by reading first input word. := .ArgHint("s", HintShiftCount) := .Pipe()if == "lshVU" { .SetBackward() } := []int{1, 4}if .Arch == Arch386 { = []int{1} // too few registers for more .SetUseIndexCounter() } .LoadPtrs() .Comment("shift first word into carry") := .LoadN(1)[0][0]// Decide how to shift. On systems with a wide shift (x86), use that. // Otherwise, we need shift by s and negative (reverse) shift by 64-s or 32-s. := .Lsh := .LshWide := .Rsh := .RshRegif == "rshVU" { = .Rsh = .RshWide = .Lsh = .LshReg }if .Arch.HasShiftWide() {// Use wide shift to avoid needing negative shifts. // The invariant is that prev holds the previous word (not shifted at all), // to be used as input into the wide shift. // After the loop finishes, prev holds the final output word to be written. := .Reg() (, , .Imm(0), ) .StoreArg(, "c") .Free() .Comment("shift remaining words") .Start(, ...) .Loop(func( [][]Reg, [][]Reg) {// We reuse the input registers as output, delayed one cycle; prev is the first output. // After writing the outputs to memory, we can copy the final x value into prev // for the next iteration. := for , := range [0] { (, , , ) [0][] = = } .StoreN() .Mov(, ) }) .Comment("store final shifted bits") (, , ) } else {// Construct values from x << s and x >> (64-s). // After the first word has been processed, the invariant is that // prev holds x << s, to be used as the high bits of the next output word, // once we find the low bits after reading the next input word. // After the loop finishes, prev holds the final output word to be written. := .Reg() .Mov(.Imm(.Arch.WordBits), ) .Sub(, , , SmashCarry) := .Reg() (, , ) (, , ) .StoreArg(, "c") .Free() .Comment("shift remaining words") .Start(, ...) .Loop(func(, [][]Reg) {if .HasRegShift() {// ARM (32-bit) allows shifts in most arithmetic expressions, // including OR, letting us combine the negShift and a.Or. // The simplest way to manage the registers is to do StoreN for // one output at a time, and since we don't use multi-register // stores on ARM, that doesn't hurt us. [0] = [0][:1]for , := range [0] { .Or((, ), , ) [0][0] = .StoreN() (, , ) }return }// We reuse the input registers as output, delayed one cycle; z0 is the first output. := .Reg() := for , := range [0] { (, , ) .Or(, , ) (, , ) [0][] = = } .StoreN() }) .Comment("store final shifted bits") } .StoreN([][]Reg{{}}) .Done() .Free() .Ret()// Return 0, used from above. .Label("ret0") .StoreArg(.Imm(0), "c") .Ret()}
The pages are generated with Goldsv0.7.9-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.