Source File
natconv.go
Belonging Package
math/big
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements nat-to-string conversion functions.
package big
import (
)
const digits = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
// Note: MaxBase = len(digits), but it must remain an untyped rune constant
// for API compatibility.
// MaxBase is the largest number base accepted for string conversions.
const MaxBase = 10 + ('z' - 'a' + 1) + ('Z' - 'A' + 1)
const maxBaseSmall = 10 + ('z' - 'a' + 1)
// maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M.
// For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word.
// In other words, at most n digits in base b fit into a Word.
// TODO(gri) replace this with a table, generated at build time.
func maxPow( Word) ( Word, int) {
, = , 1 // assuming b <= _M
for := _M / ; <= ; {
// p == b**n && p <= max
*=
++
}
// p == b**n && p <= _M
return
}
// pow returns x**n for n > 0, and 1 otherwise.
func pow( Word, int) ( Word) {
// n == sum of bi * 2**i, for 0 <= i < imax, and bi is 0 or 1
// thus x**n == product of x**(2**i) for all i where bi == 1
// (Russian Peasant Method for exponentiation)
= 1
for > 0 {
if &1 != 0 {
*=
}
*=
>>= 1
}
return
}
// scan errors
var (
errNoDigits = errors.New("number has no digits")
errInvalSep = errors.New("'_' must separate successive digits")
)
// scan scans the number corresponding to the longest possible prefix
// from r representing an unsigned number in a given conversion base.
// scan returns the corresponding natural number res, the actual base b,
// a digit count, and a read or syntax error err, if any.
//
// For base 0, an underscore character “_” may appear between a base
// prefix and an adjacent digit, and between successive digits; such
// underscores do not change the value of the number, or the returned
// digit count. Incorrect placement of underscores is reported as an
// error if there are no other errors. If base != 0, underscores are
// not recognized and thus terminate scanning like any other character
// that is not a valid radix point or digit.
//
// number = mantissa | prefix pmantissa .
// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
// mantissa = digits "." [ digits ] | digits | "." digits .
// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
// digits = digit { [ "_" ] digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
//
// Unless fracOk is set, the base argument must be 0 or a value between
// 2 and MaxBase. If fracOk is set, the base argument must be one of
// 0, 2, 8, 10, or 16. Providing an invalid base argument leads to a run-
// time panic.
//
// For base 0, the number prefix determines the actual base: A prefix of
// “0b” or “0B” selects base 2, “0o” or “0O” selects base 8, and
// “0x” or “0X” selects base 16. If fracOk is false, a “0” prefix
// (immediately followed by digits) selects base 8 as well. Otherwise,
// the selected base is 10 and no prefix is accepted.
//
// If fracOk is set, a period followed by a fractional part is permitted.
// The result value is computed as if there were no period present; and
// the count value is used to determine the fractional part.
//
// For bases <= 36, lower and upper case letters are considered the same:
// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
// For bases > 36, the upper case letters 'A' to 'Z' represent the digit
// values 36 to 61.
//
// A result digit count > 0 corresponds to the number of (non-prefix) digits
// parsed. A digit count <= 0 indicates the presence of a period (if fracOk
// is set, only), and -count is the number of fractional digits found.
// In this case, the actual value of the scanned number is res * b**count.
func ( nat) ( io.ByteScanner, int, bool) ( nat, , int, error) {
// reject invalid bases
:= == 0 ||
! && 2 <= && <= MaxBase ||
&& ( == 2 || == 8 || == 10 || == 16)
if ! {
panic(fmt.Sprintf("invalid number base %d", ))
}
// prev encodes the previously seen char: it is one
// of '_', '0' (a digit), or '.' (anything else). A
// valid separator '_' may only occur after a digit
// and if base == 0.
:= '.'
:= false
// one char look-ahead
, := .ReadByte()
// determine actual base
, := , 0
if == 0 {
// actual base is 10 unless there's a base prefix
= 10
if == nil && == '0' {
= '0'
= 1
, = .ReadByte()
if == nil {
// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
switch {
case 'b', 'B':
, = 2, 'b'
case 'o', 'O':
, = 8, 'o'
case 'x', 'X':
, = 16, 'x'
default:
if ! {
, = 8, '0'
}
}
if != 0 {
= 0 // prefix is not counted
if != '0' {
, = .ReadByte()
}
}
}
}
}
// convert string
// Algorithm: Collect digits in groups of at most n digits in di
// and then use mulAddWW for every such group to add them to the
// result.
= [:0]
:= Word()
, := maxPow() // at most n digits in base b1 fit into Word
:= Word(0) // 0 <= di < b1**i < bn
:= 0 // 0 <= i < n
:= -1 // position of decimal point
for == nil {
if == '.' && {
= false
if == '_' {
= true
}
= '.'
=
} else if == '_' && == 0 {
if != '0' {
= true
}
= '_'
} else {
// convert rune into digit value d1
var Word
switch {
case '0' <= && <= '9':
= Word( - '0')
case 'a' <= && <= 'z':
= Word( - 'a' + 10)
case 'A' <= && <= 'Z':
if <= maxBaseSmall {
= Word( - 'A' + 10)
} else {
= Word( - 'A' + maxBaseSmall)
}
default:
= MaxBase + 1
}
if >= {
.UnreadByte() // ch does not belong to number anymore
break
}
= '0'
++
// collect d1 in di
= * +
++
// if di is "full", add it to the result
if == {
= .mulAddWW(, , )
= 0
= 0
}
}
, = .ReadByte()
}
if == io.EOF {
= nil
}
// other errors take precedence over invalid separators
if == nil && ( || == '_') {
= errInvalSep
}
if == 0 {
// no digits found
if == '0' {
// there was only the octal prefix 0 (possibly followed by separators and digits > 7);
// interpret as decimal 0
return [:0], 10, 1,
}
= errNoDigits // fall through; result will be 0
}
// add remaining digits to result
if > 0 {
= .mulAddWW(, pow(, ), )
}
= .norm()
// adjust count for fraction, if any
if >= 0 {
// 0 <= dp <= count
= -
}
return
}
// utoa converts x to an ASCII representation in the given base;
// base must be between 2 and MaxBase, inclusive.
func ( nat) ( int) []byte {
return .itoa(false, )
}
// itoa is like utoa but it prepends a '-' if neg && x != 0.
func ( nat) ( bool, int) []byte {
if < 2 || > MaxBase {
panic("invalid base")
}
// x == 0
if len() == 0 {
return []byte("0")
}
// len(x) > 0
// allocate buffer for conversion
:= int(float64(.bitLen())/math.Log2(float64())) + 1 // off by 1 at most
if {
++
}
:= make([]byte, )
// convert power of two and non power of two bases separately
if := Word(); == &- {
// shift is base b digit size in bits
:= uint(bits.TrailingZeros(uint())) // shift > 0 because b >= 2
:= Word(1<< - 1)
:= [0] // current word
:= uint(_W) // number of unprocessed bits in w
// convert less-significant words (include leading zeros)
for := 1; < len(); ++ {
// convert full digits
for >= {
--
[] = digits[&]
>>=
-=
}
// convert any partial leading digit and advance to next word
if == 0 {
// no partial digit remaining, just advance
= []
= _W
} else {
// partial digit in current word w (== x[k-1]) and next word x[k]
|= [] <<
--
[] = digits[&]
// advance
= [] >> ( - )
= _W - ( - )
}
}
// convert digits of most-significant word w (omit leading zeros)
for != 0 {
--
[] = digits[&]
>>=
}
} else {
, := maxPow()
// construct table of successive squares of bb*leafSize to use in subdivisions
// result (table != nil) <=> (len(x) > leafSize > 0)
:= divisors(len(), , , )
// preserve x, create local copy for use by convertWords
:= nat(nil).set()
// convert q to string s in base b
.convertWords(, , , , )
// strip leading zeros
// (x != 0; thus s must contain at least one non-zero digit
// and the loop will terminate)
= 0
for [] == '0' {
++
}
}
if {
--
[] = '-'
}
return [:]
}
// Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
// repeated nat/Word division.
//
// The iterative method processes n Words by n divW() calls, each of which visits every Word in the
// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
// Recursive conversion divides q by its approximate square root, yielding two parts, each half
// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
// is made better by splitting the subblocks recursively. Best is to split blocks until one more
// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
// specific hardware.
func ( nat) ( []byte, Word, int, Word, []divisor) {
// split larger blocks recursively
if != nil {
// len(q) > leafSize > 0
var nat
:= len() - 1
for len() > leafSize {
// find divisor close to sqrt(q) if possible, but in any case < q
:= .bitLen() // ~= log2 q, or at of least largest possible q of this bit length
:= >> 1 // ~= log2 sqrt(q)
for > 0 && [-1].nbits > {
-- // desired
}
if [].nbits >= && [].bbb.cmp() >= 0 {
--
if < 0 {
panic("internal inconsistency")
}
}
// split q into the two digit number (q'*bbb + r) to form independent subblocks
, = .div(, , [].bbb)
// convert subblocks and collect results in s[:h] and s[h:]
:= len() - [].ndigits
.([:], , , , [0:])
= [:] // == q.convertWords(s, b, ndigits, bb, table[0:index+1])
}
}
// having split any large blocks now process the remaining (small) block iteratively
:= len()
var Word
if == 10 {
// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
for len() > 0 {
// extract least significant, base bb "digit"
, = .divW(, )
for := 0; < && > 0; ++ {
--
// avoid % computation since r%10 == r - int(r/10)*10;
// this appears to be faster for BenchmarkString10000Base10
// and smaller strings (but a bit slower for larger ones)
:= / 10
[] = '0' + byte(-*10)
=
}
}
} else {
for len() > 0 {
// extract least significant, base bb "digit"
, = .divW(, )
for := 0; < && > 0; ++ {
--
[] = digits[%]
/=
}
}
}
// prepend high-order zeros
for > 0 { // while need more leading zeros
--
[] = '0'
}
}
// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
// Benchmark and configure leafSize using: go test -bench="Leaf"
//
// 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
// 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
type divisor struct {
bbb nat // divisor
nbits int // bit length of divisor (discounting leading zeros) ~= log2(bbb)
ndigits int // digit length of divisor in terms of output base digits
}
var cacheBase10 struct {
sync.Mutex
table [64]divisor // cached divisors for base 10
}
// expWW computes x**y
func ( nat) (, Word) nat {
return .expNN(nat(nil).setWord(), nat(nil).setWord(), nil, false)
}
// construct table of powers of bb*leafSize to use in subdivisions.
func divisors( int, Word, int, Word) []divisor {
// only compute table when recursive conversion is enabled and x is large
if leafSize == 0 || <= leafSize {
return nil
}
// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
:= 1
for := leafSize; < >>1 && < len(cacheBase10.table); <<= 1 {
++
}
// reuse and extend existing table of divisors or create new table as appropriate
var []divisor // for b == 10, table overlaps with cacheBase10.table
if == 10 {
cacheBase10.Lock()
= cacheBase10.table[0:] // reuse old table for this conversion
} else {
= make([]divisor, ) // create new table for this conversion
}
// extend table
if [-1].ndigits == 0 {
// add new entries as needed
var nat
for := 0; < ; ++ {
if [].ndigits == 0 {
if == 0 {
[0].bbb = nat(nil).expWW(, Word(leafSize))
[0].ndigits = * leafSize
} else {
[].bbb = nat(nil).sqr([-1].bbb)
[].ndigits = 2 * [-1].ndigits
}
// optimization: exploit aggregated extra bits in macro blocks
= nat(nil).set([].bbb)
for mulAddVWW(, , , 0) == 0 {
[].bbb = [].bbb.set()
[].ndigits++
}
[].nbits = [].bbb.bitLen()
}
}
}
if == 10 {
cacheBase10.Unlock()
}
return
}
The pages are generated with Golds v0.7.3. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |