// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// This file implements multi-precision floating-point numbers.
// Like in the GNU MPFR library (https://www.mpfr.org/), operands
// can be of mixed precision. Unlike MPFR, the rounding mode is
// not specified with each operation, but with each operand. The
// rounding mode of the result operand determines the rounding
// mode of an operation. This is a from-scratch implementation.

package big

import (
	
	
	
)

const debugFloat = false // enable for debugging

// A nonzero finite Float represents a multi-precision floating point number
//
//	sign × mantissa × 2**exponent
//
// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
// All Floats are ordered, and the ordering of two Floats x and y
// is defined by x.Cmp(y).
//
// Each Float value also has a precision, rounding mode, and accuracy.
// The precision is the maximum number of mantissa bits available to
// represent the value. The rounding mode specifies how a result should
// be rounded to fit into the mantissa bits, and accuracy describes the
// rounding error with respect to the exact result.
//
// Unless specified otherwise, all operations (including setters) that
// specify a *Float variable for the result (usually via the receiver
// with the exception of [Float.MantExp]), round the numeric result according
// to the precision and rounding mode of the result variable.
//
// If the provided result precision is 0 (see below), it is set to the
// precision of the argument with the largest precision value before any
// rounding takes place, and the rounding mode remains unchanged. Thus,
// uninitialized Floats provided as result arguments will have their
// precision set to a reasonable value determined by the operands, and
// their mode is the zero value for RoundingMode (ToNearestEven).
//
// By setting the desired precision to 24 or 53 and using matching rounding
// mode (typically [ToNearestEven]), Float operations produce the same results
// as the corresponding float32 or float64 IEEE 754 arithmetic for operands
// that correspond to normal (i.e., not denormal) float32 or float64 numbers.
// Exponent underflow and overflow lead to a 0 or an Infinity for different
// values than IEEE 754 because Float exponents have a much larger range.
//
// The zero (uninitialized) value for a Float is ready to use and represents
// the number +0.0 exactly, with precision 0 and rounding mode [ToNearestEven].
//
// Operations always take pointer arguments (*Float) rather
// than Float values, and each unique Float value requires
// its own unique *Float pointer. To "copy" a Float value,
// an existing (or newly allocated) Float must be set to
// a new value using the [Float.Set] method; shallow copies
// of Floats are not supported and may lead to errors.
type Float struct {
	prec uint32
	mode RoundingMode
	acc  Accuracy
	form form
	neg  bool
	mant nat
	exp  int32
}

// An ErrNaN panic is raised by a [Float] operation that would lead to
// a NaN under IEEE 754 rules. An ErrNaN implements the error interface.
type ErrNaN struct {
	msg string
}

func ( ErrNaN) () string {
	return .msg
}

// NewFloat allocates and returns a new [Float] set to x,
// with precision 53 and rounding mode [ToNearestEven].
// NewFloat panics with [ErrNaN] if x is a NaN.
func ( float64) *Float {
	if math.IsNaN() {
		panic(ErrNaN{"NewFloat(NaN)"})
	}
	return new(Float).SetFloat64()
}

// Exponent and precision limits.
const (
	MaxExp  = math.MaxInt32  // largest supported exponent
	MinExp  = math.MinInt32  // smallest supported exponent
	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
)

// Internal representation: The mantissa bits x.mant of a nonzero finite
// Float x are stored in a nat slice long enough to hold up to x.prec bits;
// the slice may (but doesn't have to) be shorter if the mantissa contains
// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
// the msb is shifted all the way "to the left"). Thus, if the mantissa has
// trailing 0 bits or x.prec is not a multiple of the Word size _W,
// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
// to the value 0.5; the exponent x.exp shifts the binary point as needed.
//
// A zero or non-finite Float x ignores x.mant and x.exp.
//
// x                 form      neg      mant         exp
// ----------------------------------------------------------
// ±0                zero      sign     -            -
// 0 < |x| < +Inf    finite    sign     mantissa     exponent
// ±Inf              inf       sign     -            -

// A form value describes the internal representation.
type form byte

// The form value order is relevant - do not change!
const (
	zero form = iota
	finite
	inf
)

// RoundingMode determines how a [Float] value is rounded to the
// desired precision. Rounding may change the [Float] value; the
// rounding error is described by the [Float]'s [Accuracy].
type RoundingMode byte

// These constants define supported rounding modes.
const (
	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
	ToZero                            // == IEEE 754-2008 roundTowardZero
	AwayFromZero                      // no IEEE 754-2008 equivalent
	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
)

//go:generate stringer -type=RoundingMode

// Accuracy describes the rounding error produced by the most recent
// operation that generated a [Float] value, relative to the exact value.
type Accuracy int8

// Constants describing the [Accuracy] of a [Float].
const (
	Below Accuracy = -1
	Exact Accuracy = 0
	Above Accuracy = +1
)

//go:generate stringer -type=Accuracy

// SetPrec sets z's precision to prec and returns the (possibly) rounded
// value of z. Rounding occurs according to z's rounding mode if the mantissa
// cannot be represented in prec bits without loss of precision.
// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
// If prec > [MaxPrec], it is set to [MaxPrec].
func ( *Float) ( uint) *Float {
	.acc = Exact // optimistically assume no rounding is needed

	// special case
	if  == 0 {
		.prec = 0
		if .form == finite {
			// truncate z to 0
			.acc = makeAcc(.neg)
			.form = zero
		}
		return 
	}

	// general case
	if  > MaxPrec {
		 = MaxPrec
	}
	 := .prec
	.prec = uint32()
	if .prec <  {
		.round(0)
	}
	return 
}

func makeAcc( bool) Accuracy {
	if  {
		return Above
	}
	return Below
}

// SetMode sets z's rounding mode to mode and returns an exact z.
// z remains unchanged otherwise.
// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to [Exact].
func ( *Float) ( RoundingMode) *Float {
	.mode = 
	.acc = Exact
	return 
}

// Prec returns the mantissa precision of x in bits.
// The result may be 0 for |x| == 0 and |x| == Inf.
func ( *Float) () uint {
	return uint(.prec)
}

// MinPrec returns the minimum precision required to represent x exactly
// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
// The result is 0 for |x| == 0 and |x| == Inf.
func ( *Float) () uint {
	if .form != finite {
		return 0
	}
	return uint(len(.mant))*_W - .mant.trailingZeroBits()
}

// Mode returns the rounding mode of x.
func ( *Float) () RoundingMode {
	return .mode
}

// Acc returns the accuracy of x produced by the most recent
// operation, unless explicitly documented otherwise by that
// operation.
func ( *Float) () Accuracy {
	return .acc
}

// Sign returns:
//   - -1 if x < 0;
//   - 0 if x is ±0;
//   - +1 if x > 0.
func ( *Float) () int {
	if debugFloat {
		.validate()
	}
	if .form == zero {
		return 0
	}
	if .neg {
		return -1
	}
	return 1
}

// MantExp breaks x into its mantissa and exponent components
// and returns the exponent. If a non-nil mant argument is
// provided its value is set to the mantissa of x, with the
// same precision and rounding mode as x. The components
// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
// Calling MantExp with a nil argument is an efficient way to
// get the exponent of the receiver.
//
// Special cases are:
//
//	(  ±0).MantExp(mant) = 0, with mant set to   ±0
//	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
//
// x and mant may be the same in which case x is set to its
// mantissa value.
func ( *Float) ( *Float) ( int) {
	if debugFloat {
		.validate()
	}
	if .form == finite {
		 = int(.exp)
	}
	if  != nil {
		.Copy()
		if .form == finite {
			.exp = 0
		}
	}
	return
}

func ( *Float) ( int64,  uint) {
	if  < MinExp {
		// underflow
		.acc = makeAcc(.neg)
		.form = zero
		return
	}

	if  > MaxExp {
		// overflow
		.acc = makeAcc(!.neg)
		.form = inf
		return
	}

	.form = finite
	.exp = int32()
	.round()
}

// SetMantExp sets z to mant × 2**exp and returns z.
// The result z has the same precision and rounding mode
// as mant. SetMantExp is an inverse of [Float.MantExp] but does
// not require 0.5 <= |mant| < 1.0. Specifically, for a
// given x of type *[Float], SetMantExp relates to [Float.MantExp]
// as follows:
//
//	mant := new(Float)
//	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
//
// Special cases are:
//
//	z.SetMantExp(  ±0, exp) =   ±0
//	z.SetMantExp(±Inf, exp) = ±Inf
//
// z and mant may be the same in which case z's exponent
// is set to exp.
func ( *Float) ( *Float,  int) *Float {
	if debugFloat {
		.validate()
		.validate()
	}
	.Copy()

	if .form == finite {
		// 0 < |mant| < +Inf
		.setExpAndRound(int64(.exp)+int64(), 0)
	}
	return 
}

// Signbit reports whether x is negative or negative zero.
func ( *Float) () bool {
	return .neg
}

// IsInf reports whether x is +Inf or -Inf.
func ( *Float) () bool {
	return .form == inf
}

// IsInt reports whether x is an integer.
// ±Inf values are not integers.
func ( *Float) () bool {
	if debugFloat {
		.validate()
	}
	// special cases
	if .form != finite {
		return .form == zero
	}
	// x.form == finite
	if .exp <= 0 {
		return false
	}
	// x.exp > 0
	return .prec <= uint32(.exp) || .MinPrec() <= uint(.exp) // not enough bits for fractional mantissa
}

// debugging support
func ( *Float) () {
	if !debugFloat {
		// avoid performance bugs
		panic("validate called but debugFloat is not set")
	}
	if  := .validate0();  != "" {
		panic()
	}
}

func ( *Float) () string {
	if .form != finite {
		return ""
	}
	 := len(.mant)
	if  == 0 {
		return "nonzero finite number with empty mantissa"
	}
	const  = 1 << (_W - 1)
	if .mant[-1]& == 0 {
		return fmt.Sprintf("msb not set in last word %#x of %s", .mant[-1], .Text('p', 0))
	}
	if .prec == 0 {
		return "zero precision finite number"
	}
	return ""
}

// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
// have before calling round. z's mantissa must be normalized (with the msb set)
// or empty.
//
// CAUTION: The rounding modes [ToNegativeInf], [ToPositiveInf] are affected by the
// sign of z. For correct rounding, the sign of z must be set correctly before
// calling round.
func ( *Float) ( uint) {
	if debugFloat {
		.validate()
	}

	.acc = Exact
	if .form != finite {
		// ±0 or ±Inf => nothing left to do
		return
	}
	// z.form == finite && len(z.mant) > 0
	// m > 0 implies z.prec > 0 (checked by validate)

	 := uint32(len(.mant)) // present mantissa length in words
	 :=  * _W           // present mantissa bits; bits > 0
	if  <= .prec {
		// mantissa fits => nothing to do
		return
	}
	// bits > z.prec

	// Rounding is based on two bits: the rounding bit (rbit) and the
	// sticky bit (sbit). The rbit is the bit immediately before the
	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
	//
	//   rbit  sbit  => "fractional part"
	//
	//   0     0        == 0
	//   0     1        >  0  , < 0.5
	//   1     0        == 0.5
	//   1     1        >  0.5, < 1.0

	// bits > z.prec: mantissa too large => round
	 := uint( - .prec - 1) // rounding bit position; r >= 0
	 := .mant.bit() & 1    // rounding bit; be safe and ensure it's a single bit
	// The sticky bit is only needed for rounding ToNearestEven
	// or when the rounding bit is zero. Avoid computation otherwise.
	if  == 0 && ( == 0 || .mode == ToNearestEven) {
		 = .mant.sticky()
	}
	 &= 1 // be safe and ensure it's a single bit

	// cut off extra words
	 := (.prec + (_W - 1)) / _W // mantissa length in words for desired precision
	if  >  {
		copy(.mant, .mant[-:]) // move n last words to front
		.mant = .mant[:]
	}

	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
	 := *_W - .prec // 0 <= ntz < _W
	 := Word(1) << 

	// round if result is inexact
	if | != 0 {
		// Make rounding decision: The result mantissa is truncated ("rounded down")
		// by default. Decide if we need to increment, or "round up", the (unsigned)
		// mantissa.
		 := false
		switch .mode {
		case ToNegativeInf:
			 = .neg
		case ToZero:
			// nothing to do
		case ToNearestEven:
			 =  != 0 && ( != 0 || .mant[0]& != 0)
		case ToNearestAway:
			 =  != 0
		case AwayFromZero:
			 = true
		case ToPositiveInf:
			 = !.neg
		default:
			panic("unreachable")
		}

		// A positive result (!z.neg) is Above the exact result if we increment,
		// and it's Below if we truncate (Exact results require no rounding).
		// For a negative result (z.neg) it is exactly the opposite.
		.acc = makeAcc( != .neg)

		if  {
			// add 1 to mantissa
			if addVW(.mant, .mant, ) != 0 {
				// mantissa overflow => adjust exponent
				if .exp >= MaxExp {
					// exponent overflow
					.form = inf
					return
				}
				.exp++
				// adjust mantissa: divide by 2 to compensate for exponent adjustment
				shrVU(.mant, .mant, 1)
				// set msb == carry == 1 from the mantissa overflow above
				const  = 1 << (_W - 1)
				.mant[-1] |= 
			}
		}
	}

	// zero out trailing bits in least-significant word
	.mant[0] &^=  - 1

	if debugFloat {
		.validate()
	}
}

func ( *Float) ( bool,  uint64) *Float {
	if .prec == 0 {
		.prec = 64
	}
	.acc = Exact
	.neg = 
	if  == 0 {
		.form = zero
		return 
	}
	// x != 0
	.form = finite
	 := bits.LeadingZeros64()
	.mant = .mant.setUint64( << uint())
	.exp = int32(64 - ) // always fits
	if .prec < 64 {
		.round(0)
	}
	return 
}

// SetUint64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 64 (and rounding will have
// no effect).
func ( *Float) ( uint64) *Float {
	return .setBits64(false, )
}

// SetInt64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 64 (and rounding will have
// no effect).
func ( *Float) ( int64) *Float {
	 := 
	if  < 0 {
		 = -
	}
	// We cannot simply call z.SetUint64(uint64(u)) and change
	// the sign afterwards because the sign affects rounding.
	return .setBits64( < 0, uint64())
}

// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 53 (and rounding will have
// no effect). SetFloat64 panics with [ErrNaN] if x is a NaN.
func ( *Float) ( float64) *Float {
	if .prec == 0 {
		.prec = 53
	}
	if math.IsNaN() {
		panic(ErrNaN{"Float.SetFloat64(NaN)"})
	}
	.acc = Exact
	.neg = math.Signbit() // handle -0, -Inf correctly
	if  == 0 {
		.form = zero
		return 
	}
	if math.IsInf(, 0) {
		.form = inf
		return 
	}
	// normalized x != 0
	.form = finite
	,  := math.Frexp() // get normalized mantissa
	.mant = .mant.setUint64(1<<63 | math.Float64bits()<<11)
	.exp = int32() // always fits
	if .prec < 53 {
		.round(0)
	}
	return 
}

// fnorm normalizes mantissa m by shifting it to the left
// such that the msb of the most-significant word (msw) is 1.
// It returns the shift amount. It assumes that len(m) != 0.
func fnorm( nat) int64 {
	if debugFloat && (len() == 0 || [len()-1] == 0) {
		panic("msw of mantissa is 0")
	}
	 := nlz([len()-1])
	if  > 0 {
		 := shlVU(, , )
		if debugFloat &&  != 0 {
			panic("nlz or shlVU incorrect")
		}
	}
	return int64()
}

// SetInt sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to the larger of x.BitLen()
// or 64 (and rounding will have no effect).
func ( *Float) ( *Int) *Float {
	// TODO(gri) can be more efficient if z.prec > 0
	// but small compared to the size of x, or if there
	// are many trailing 0's.
	 := uint32(.BitLen())
	if .prec == 0 {
		.prec = umax32(, 64)
	}
	.acc = Exact
	.neg = .neg
	if len(.abs) == 0 {
		.form = zero
		return 
	}
	// x != 0
	.mant = .mant.set(.abs)
	fnorm(.mant)
	.setExpAndRound(int64(), 0)
	return 
}

// SetRat sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to the largest of a.BitLen(),
// b.BitLen(), or 64; with x = a/b.
func ( *Float) ( *Rat) *Float {
	if .IsInt() {
		return .SetInt(.Num())
	}
	var ,  Float
	.SetInt(.Num())
	.SetInt(.Denom())
	if .prec == 0 {
		.prec = umax32(.prec, .prec)
	}
	return .Quo(&, &)
}

// SetInf sets z to the infinite Float -Inf if signbit is
// set, or +Inf if signbit is not set, and returns z. The
// precision of z is unchanged and the result is always
// [Exact].
func ( *Float) ( bool) *Float {
	.acc = Exact
	.form = inf
	.neg = 
	return 
}

// Set sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to the precision of x
// before setting z (and rounding will have no effect).
// Rounding is performed according to z's precision and rounding
// mode; and z's accuracy reports the result error relative to the
// exact (not rounded) result.
func ( *Float) ( *Float) *Float {
	if debugFloat {
		.validate()
	}
	.acc = Exact
	if  !=  {
		.form = .form
		.neg = .neg
		if .form == finite {
			.exp = .exp
			.mant = .mant.set(.mant)
		}
		if .prec == 0 {
			.prec = .prec
		} else if .prec < .prec {
			.round(0)
		}
	}
	return 
}

// Copy sets z to x, with the same precision, rounding mode, and accuracy as x.
// Copy returns z. If x and z are identical, Copy is a no-op.
func ( *Float) ( *Float) *Float {
	if debugFloat {
		.validate()
	}
	if  !=  {
		.prec = .prec
		.mode = .mode
		.acc = .acc
		.form = .form
		.neg = .neg
		if .form == finite {
			.mant = .mant.set(.mant)
			.exp = .exp
		}
	}
	return 
}

// msb32 returns the 32 most significant bits of x.
func msb32( nat) uint32 {
	 := len() - 1
	if  < 0 {
		return 0
	}
	if debugFloat && []&(1<<(_W-1)) == 0 {
		panic("x not normalized")
	}
	switch _W {
	case 32:
		return uint32([])
	case 64:
		return uint32([] >> 32)
	}
	panic("unreachable")
}

// msb64 returns the 64 most significant bits of x.
func msb64( nat) uint64 {
	 := len() - 1
	if  < 0 {
		return 0
	}
	if debugFloat && []&(1<<(_W-1)) == 0 {
		panic("x not normalized")
	}
	switch _W {
	case 32:
		 := uint64([]) << 32
		if  > 0 {
			 |= uint64([-1])
		}
		return 
	case 64:
		return uint64([])
	}
	panic("unreachable")
}

// Uint64 returns the unsigned integer resulting from truncating x
// towards zero. If 0 <= x <= [math.MaxUint64], the result is [Exact]
// if x is an integer and [Below] otherwise.
// The result is (0, [Above]) for x < 0, and ([math.MaxUint64], [Below])
// for x > [math.MaxUint64].
func ( *Float) () (uint64, Accuracy) {
	if debugFloat {
		.validate()
	}

	switch .form {
	case finite:
		if .neg {
			return 0, Above
		}
		// 0 < x < +Inf
		if .exp <= 0 {
			// 0 < x < 1
			return 0, Below
		}
		// 1 <= x < Inf
		if .exp <= 64 {
			// u = trunc(x) fits into a uint64
			 := msb64(.mant) >> (64 - uint32(.exp))
			if .MinPrec() <= 64 {
				return , Exact
			}
			return , Below // x truncated
		}
		// x too large
		return math.MaxUint64, Below

	case zero:
		return 0, Exact

	case inf:
		if .neg {
			return 0, Above
		}
		return math.MaxUint64, Below
	}

	panic("unreachable")
}

// Int64 returns the integer resulting from truncating x towards zero.
// If [math.MinInt64] <= x <= [math.MaxInt64], the result is [Exact] if x is
// an integer, and [Above] (x < 0) or [Below] (x > 0) otherwise.
// The result is ([math.MinInt64], [Above]) for x < [math.MinInt64],
// and ([math.MaxInt64], [Below]) for x > [math.MaxInt64].
func ( *Float) () (int64, Accuracy) {
	if debugFloat {
		.validate()
	}

	switch .form {
	case finite:
		// 0 < |x| < +Inf
		 := makeAcc(.neg)
		if .exp <= 0 {
			// 0 < |x| < 1
			return 0, 
		}
		// x.exp > 0

		// 1 <= |x| < +Inf
		if .exp <= 63 {
			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
			 := int64(msb64(.mant) >> (64 - uint32(.exp)))
			if .neg {
				 = -
			}
			if .MinPrec() <= uint(.exp) {
				return , Exact
			}
			return ,  // x truncated
		}
		if .neg {
			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
			if .exp == 64 && .MinPrec() == 1 {
				 = Exact
			}
			return math.MinInt64, 
		}
		// x too large
		return math.MaxInt64, Below

	case zero:
		return 0, Exact

	case inf:
		if .neg {
			return math.MinInt64, Above
		}
		return math.MaxInt64, Below
	}

	panic("unreachable")
}

// Float32 returns the float32 value nearest to x. If x is too small to be
// represented by a float32 (|x| < [math.SmallestNonzeroFloat32]), the result
// is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x.
// If x is too large to be represented by a float32 (|x| > [math.MaxFloat32]),
// the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x.
func ( *Float) () (float32, Accuracy) {
	if debugFloat {
		.validate()
	}

	switch .form {
	case finite:
		// 0 < |x| < +Inf

		const (
			 = 32                //        float size
			 = 23                //        mantissa size (excluding implicit msb)
			 =  -  - 1 //     8  exponent size
			  = 1<<(-1) - 1  //   127  exponent bias
			  = 1 -  -   //  -149  smallest unbiased exponent (denormal)
			  = 1 -           //  -126  smallest unbiased exponent (normal)
			  =               //   127  largest unbiased exponent (normal)
		)

		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
		 := .exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0

		// Compute precision p for float32 mantissa.
		// If the exponent is too small, we have a denormal number before
		// rounding and fewer than p mantissa bits of precision available
		// (the exponent remains fixed but the mantissa gets shifted right).
		 :=  + 1 // precision of normal float
		if  <  {
			// recompute precision
			 =  + 1 -  + int()
			// If p == 0, the mantissa of x is shifted so much to the right
			// that its msb falls immediately to the right of the float32
			// mantissa space. In other words, if the smallest denormal is
			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
			// If m == 0.5, it is rounded down to even, i.e., 0.0.
			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
			if  < 0 /* m <= 0.25 */ ||  == 0 && .mant.sticky(uint(len(.mant))*_W-1) == 0 /* m == 0.5 */ {
				// underflow to ±0
				if .neg {
					var  float32
					return -, Above
				}
				return 0.0, Below
			}
			// otherwise, round up
			// We handle p == 0 explicitly because it's easy and because
			// Float.round doesn't support rounding to 0 bits of precision.
			if  == 0 {
				if .neg {
					return -math.SmallestNonzeroFloat32, Below
				}
				return math.SmallestNonzeroFloat32, Above
			}
		}
		// p > 0

		// round
		var  Float
		.prec = uint32()
		.Set()
		 = .exp - 1

		// Rounding may have caused r to overflow to ±Inf
		// (rounding never causes underflows to 0).
		// If the exponent is too large, also overflow to ±Inf.
		if .form == inf ||  >  {
			// overflow
			if .neg {
				return float32(math.Inf(-1)), Below
			}
			return float32(math.Inf(+1)), Above
		}
		// e <= emax

		// Determine sign, biased exponent, and mantissa.
		var , ,  uint32
		if .neg {
			 = 1 << ( - 1)
		}

		// Rounding may have caused a denormal number to
		// become normal. Check again.
		if  <  {
			// denormal number: recompute precision
			// Since rounding may have at best increased precision
			// and we have eliminated p <= 0 early, we know p > 0.
			// bexp == 0 for denormals
			 =  + 1 -  + int()
			 = msb32(.mant) >> uint(-)
		} else {
			// normal number: emin <= e <= emax
			 = uint32(+) << 
			 = msb32(.mant) >>  & (1<< - 1) // cut off msb (implicit 1 bit)
		}

		return math.Float32frombits( |  | ), .acc

	case zero:
		if .neg {
			var  float32
			return -, Exact
		}
		return 0.0, Exact

	case inf:
		if .neg {
			return float32(math.Inf(-1)), Exact
		}
		return float32(math.Inf(+1)), Exact
	}

	panic("unreachable")
}

// Float64 returns the float64 value nearest to x. If x is too small to be
// represented by a float64 (|x| < [math.SmallestNonzeroFloat64]), the result
// is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x.
// If x is too large to be represented by a float64 (|x| > [math.MaxFloat64]),
// the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x.
func ( *Float) () (float64, Accuracy) {
	if debugFloat {
		.validate()
	}

	switch .form {
	case finite:
		// 0 < |x| < +Inf

		const (
			 = 64                //        float size
			 = 52                //        mantissa size (excluding implicit msb)
			 =  -  - 1 //    11  exponent size
			  = 1<<(-1) - 1  //  1023  exponent bias
			  = 1 -  -   // -1074  smallest unbiased exponent (denormal)
			  = 1 -           // -1022  smallest unbiased exponent (normal)
			  =               //  1023  largest unbiased exponent (normal)
		)

		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
		 := .exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0

		// Compute precision p for float64 mantissa.
		// If the exponent is too small, we have a denormal number before
		// rounding and fewer than p mantissa bits of precision available
		// (the exponent remains fixed but the mantissa gets shifted right).
		 :=  + 1 // precision of normal float
		if  <  {
			// recompute precision
			 =  + 1 -  + int()
			// If p == 0, the mantissa of x is shifted so much to the right
			// that its msb falls immediately to the right of the float64
			// mantissa space. In other words, if the smallest denormal is
			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
			// If m == 0.5, it is rounded down to even, i.e., 0.0.
			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
			if  < 0 /* m <= 0.25 */ ||  == 0 && .mant.sticky(uint(len(.mant))*_W-1) == 0 /* m == 0.5 */ {
				// underflow to ±0
				if .neg {
					var  float64
					return -, Above
				}
				return 0.0, Below
			}
			// otherwise, round up
			// We handle p == 0 explicitly because it's easy and because
			// Float.round doesn't support rounding to 0 bits of precision.
			if  == 0 {
				if .neg {
					return -math.SmallestNonzeroFloat64, Below
				}
				return math.SmallestNonzeroFloat64, Above
			}
		}
		// p > 0

		// round
		var  Float
		.prec = uint32()
		.Set()
		 = .exp - 1

		// Rounding may have caused r to overflow to ±Inf
		// (rounding never causes underflows to 0).
		// If the exponent is too large, also overflow to ±Inf.
		if .form == inf ||  >  {
			// overflow
			if .neg {
				return math.Inf(-1), Below
			}
			return math.Inf(+1), Above
		}
		// e <= emax

		// Determine sign, biased exponent, and mantissa.
		var , ,  uint64
		if .neg {
			 = 1 << ( - 1)
		}

		// Rounding may have caused a denormal number to
		// become normal. Check again.
		if  <  {
			// denormal number: recompute precision
			// Since rounding may have at best increased precision
			// and we have eliminated p <= 0 early, we know p > 0.
			// bexp == 0 for denormals
			 =  + 1 -  + int()
			 = msb64(.mant) >> uint(-)
		} else {
			// normal number: emin <= e <= emax
			 = uint64(+) << 
			 = msb64(.mant) >>  & (1<< - 1) // cut off msb (implicit 1 bit)
		}

		return math.Float64frombits( |  | ), .acc

	case zero:
		if .neg {
			var  float64
			return -, Exact
		}
		return 0.0, Exact

	case inf:
		if .neg {
			return math.Inf(-1), Exact
		}
		return math.Inf(+1), Exact
	}

	panic("unreachable")
}

// Int returns the result of truncating x towards zero;
// or nil if x is an infinity.
// The result is [Exact] if x.IsInt(); otherwise it is [Below]
// for x > 0, and [Above] for x < 0.
// If a non-nil *[Int] argument z is provided, [Int] stores
// the result in z instead of allocating a new [Int].
func ( *Float) ( *Int) (*Int, Accuracy) {
	if debugFloat {
		.validate()
	}

	if  == nil && .form <= finite {
		 = new(Int)
	}

	switch .form {
	case finite:
		// 0 < |x| < +Inf
		 := makeAcc(.neg)
		if .exp <= 0 {
			// 0 < |x| < 1
			return .SetInt64(0), 
		}
		// x.exp > 0

		// 1 <= |x| < +Inf
		// determine minimum required precision for x
		 := uint(len(.mant)) * _W
		 := uint(.exp)
		if .MinPrec() <=  {
			 = Exact
		}
		// shift mantissa as needed
		if  == nil {
			 = new(Int)
		}
		.neg = .neg
		switch {
		case  > :
			.abs = .abs.shl(.mant, -)
		default:
			.abs = .abs.set(.mant)
		case  < :
			.abs = .abs.shr(.mant, -)
		}
		return , 

	case zero:
		return .SetInt64(0), Exact

	case inf:
		return nil, makeAcc(.neg)
	}

	panic("unreachable")
}

// Rat returns the rational number corresponding to x;
// or nil if x is an infinity.
// The result is [Exact] if x is not an Inf.
// If a non-nil *[Rat] argument z is provided, [Rat] stores
// the result in z instead of allocating a new [Rat].
func ( *Float) ( *Rat) (*Rat, Accuracy) {
	if debugFloat {
		.validate()
	}

	if  == nil && .form <= finite {
		 = new(Rat)
	}

	switch .form {
	case finite:
		// 0 < |x| < +Inf
		 := int32(len(.mant)) * _W
		// build up numerator and denominator
		.a.neg = .neg
		switch {
		case .exp > :
			.a.abs = .a.abs.shl(.mant, uint(.exp-))
			.b.abs = .b.abs[:0] // == 1 (see Rat)
			// z already in normal form
		default:
			.a.abs = .a.abs.set(.mant)
			.b.abs = .b.abs[:0] // == 1 (see Rat)
			// z already in normal form
		case .exp < :
			.a.abs = .a.abs.set(.mant)
			 := .b.abs.setUint64(1)
			.b.abs = .shl(, uint(-.exp))
			.norm()
		}
		return , Exact

	case zero:
		return .SetInt64(0), Exact

	case inf:
		return nil, makeAcc(.neg)
	}

	panic("unreachable")
}

// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
// and returns z.
func ( *Float) ( *Float) *Float {
	.Set()
	.neg = false
	return 
}

// Neg sets z to the (possibly rounded) value of x with its sign negated,
// and returns z.
func ( *Float) ( *Float) *Float {
	.Set()
	.neg = !.neg
	return 
}

func validateBinaryOperands(,  *Float) {
	if !debugFloat {
		// avoid performance bugs
		panic("validateBinaryOperands called but debugFloat is not set")
	}
	if len(.mant) == 0 {
		panic("empty mantissa for x")
	}
	if len(.mant) == 0 {
		panic("empty mantissa for y")
	}
}

// z = x + y, ignoring signs of x and y for the addition
// but using the sign of z for rounding the result.
// x and y must have a non-empty mantissa and valid exponent.
func ( *Float) (,  *Float) {
	// Note: This implementation requires 2 shifts most of the
	// time. It is also inefficient if exponents or precisions
	// differ by wide margins. The following article describes
	// an efficient (but much more complicated) implementation
	// compatible with the internal representation used here:
	//
	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
	// Point Addition With Exact Rounding (as in the MPFR Library)"
	// http://www.vinc17.net/research/papers/rnc6.pdf

	if debugFloat {
		validateBinaryOperands(, )
	}

	// compute exponents ex, ey for mantissa with "binary point"
	// on the right (mantissa.0) - use int64 to avoid overflow
	 := int64(.exp) - int64(len(.mant))*_W
	 := int64(.exp) - int64(len(.mant))*_W

	 := alias(.mant, .mant) || alias(.mant, .mant)

	// TODO(gri) having a combined add-and-shift primitive
	//           could make this code significantly faster
	switch {
	case  < :
		if  {
			 := nat(nil).shl(.mant, uint(-))
			.mant = .mant.add(.mant, )
		} else {
			.mant = .mant.shl(.mant, uint(-))
			.mant = .mant.add(.mant, .mant)
		}
	default:
		// ex == ey, no shift needed
		.mant = .mant.add(.mant, .mant)
	case  > :
		if  {
			 := nat(nil).shl(.mant, uint(-))
			.mant = .mant.add(, .mant)
		} else {
			.mant = .mant.shl(.mant, uint(-))
			.mant = .mant.add(.mant, .mant)
		}
		 = 
	}
	// len(z.mant) > 0

	.setExpAndRound(+int64(len(.mant))*_W-fnorm(.mant), 0)
}

// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
// but using the sign of z for rounding the result.
// x and y must have a non-empty mantissa and valid exponent.
func ( *Float) (,  *Float) {
	// This code is symmetric to uadd.
	// We have not factored the common code out because
	// eventually uadd (and usub) should be optimized
	// by special-casing, and the code will diverge.

	if debugFloat {
		validateBinaryOperands(, )
	}

	 := int64(.exp) - int64(len(.mant))*_W
	 := int64(.exp) - int64(len(.mant))*_W

	 := alias(.mant, .mant) || alias(.mant, .mant)

	switch {
	case  < :
		if  {
			 := nat(nil).shl(.mant, uint(-))
			.mant = .sub(.mant, )
		} else {
			.mant = .mant.shl(.mant, uint(-))
			.mant = .mant.sub(.mant, .mant)
		}
	default:
		// ex == ey, no shift needed
		.mant = .mant.sub(.mant, .mant)
	case  > :
		if  {
			 := nat(nil).shl(.mant, uint(-))
			.mant = .sub(, .mant)
		} else {
			.mant = .mant.shl(.mant, uint(-))
			.mant = .mant.sub(.mant, .mant)
		}
		 = 
	}

	// operands may have canceled each other out
	if len(.mant) == 0 {
		.acc = Exact
		.form = zero
		.neg = false
		return
	}
	// len(z.mant) > 0

	.setExpAndRound(+int64(len(.mant))*_W-fnorm(.mant), 0)
}

// z = x * y, ignoring signs of x and y for the multiplication
// but using the sign of z for rounding the result.
// x and y must have a non-empty mantissa and valid exponent.
func ( *Float) (,  *Float) {
	if debugFloat {
		validateBinaryOperands(, )
	}

	// Note: This is doing too much work if the precision
	// of z is less than the sum of the precisions of x
	// and y which is often the case (e.g., if all floats
	// have the same precision).
	// TODO(gri) Optimize this for the common case.

	 := int64(.exp) + int64(.exp)
	if  ==  {
		.mant = .mant.sqr(.mant)
	} else {
		.mant = .mant.mul(.mant, .mant)
	}
	.setExpAndRound(-fnorm(.mant), 0)
}

// z = x / y, ignoring signs of x and y for the division
// but using the sign of z for rounding the result.
// x and y must have a non-empty mantissa and valid exponent.
func ( *Float) (,  *Float) {
	if debugFloat {
		validateBinaryOperands(, )
	}

	// mantissa length in words for desired result precision + 1
	// (at least one extra bit so we get the rounding bit after
	// the division)
	 := int(.prec/_W) + 1

	// compute adjusted x.mant such that we get enough result precision
	 := .mant
	if  :=  - len(.mant) + len(.mant);  > 0 {
		// d extra words needed => add d "0 digits" to x
		 = make(nat, len(.mant)+)
		copy([:], .mant)
	}
	// TODO(gri): If we have too many digits (d < 0), we should be able
	// to shorten x for faster division. But we must be extra careful
	// with rounding in that case.

	// Compute d before division since there may be aliasing of x.mant
	// (via xadj) or y.mant with z.mant.
	 := len() - len(.mant)

	// divide
	var  nat
	.mant,  = .mant.div(nil, , .mant)
	 := int64(.exp) - int64(.exp) - int64(-len(.mant))*_W

	// The result is long enough to include (at least) the rounding bit.
	// If there's a non-zero remainder, the corresponding fractional part
	// (if it were computed), would have a non-zero sticky bit (if it were
	// zero, it couldn't have a non-zero remainder).
	var  uint
	if len() > 0 {
		 = 1
	}

	.setExpAndRound(-fnorm(.mant), )
}

// ucmp returns -1, 0, or +1, depending on whether
// |x| < |y|, |x| == |y|, or |x| > |y|.
// x and y must have a non-empty mantissa and valid exponent.
func ( *Float) ( *Float) int {
	if debugFloat {
		validateBinaryOperands(, )
	}

	switch {
	case .exp < .exp:
		return -1
	case .exp > .exp:
		return +1
	}
	// x.exp == y.exp

	// compare mantissas
	 := len(.mant)
	 := len(.mant)
	for  > 0 ||  > 0 {
		var ,  Word
		if  > 0 {
			--
			 = .mant[]
		}
		if  > 0 {
			--
			 = .mant[]
		}
		switch {
		case  < :
			return -1
		case  > :
			return +1
		}
	}

	return 0
}

// Handling of sign bit as defined by IEEE 754-2008, section 6.3:
//
// When neither the inputs nor result are NaN, the sign of a product or
// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
// or of a difference x−y regarded as a sum x+(−y), differs from at most
// one of the addends’ signs; and the sign of the result of conversions,
// the quantize operation, the roundToIntegral operations, and the
// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
// These rules shall apply even when operands or results are zero or infinite.
//
// When the sum of two operands with opposite signs (or the difference of
// two operands with like signs) is exactly zero, the sign of that sum (or
// difference) shall be +0 in all rounding-direction attributes except
// roundTowardNegative; under that attribute, the sign of an exact zero
// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
// sign as x even when x is zero.
//
// See also: https://play.golang.org/p/RtH3UCt5IH

// Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
// it is changed to the larger of x's or y's precision before the operation.
// Rounding is performed according to z's precision and rounding mode; and
// z's accuracy reports the result error relative to the exact (not rounded)
// result. Add panics with [ErrNaN] if x and y are infinities with opposite
// signs. The value of z is undefined in that case.
func ( *Float) (,  *Float) *Float {
	if debugFloat {
		.validate()
		.validate()
	}

	if .prec == 0 {
		.prec = umax32(.prec, .prec)
	}

	if .form == finite && .form == finite {
		// x + y (common case)

		// Below we set z.neg = x.neg, and when z aliases y this will
		// change the y operand's sign. This is fine, because if an
		// operand aliases the receiver it'll be overwritten, but we still
		// want the original x.neg and y.neg values when we evaluate
		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
		 := .neg

		.neg = .neg
		if .neg ==  {
			// x + y == x + y
			// (-x) + (-y) == -(x + y)
			.uadd(, )
		} else {
			// x + (-y) == x - y == -(y - x)
			// (-x) + y == y - x == -(x - y)
			if .ucmp() > 0 {
				.usub(, )
			} else {
				.neg = !.neg
				.usub(, )
			}
		}
		if .form == zero && .mode == ToNegativeInf && .acc == Exact {
			.neg = true
		}
		return 
	}

	if .form == inf && .form == inf && .neg != .neg {
		// +Inf + -Inf
		// -Inf + +Inf
		// value of z is undefined but make sure it's valid
		.acc = Exact
		.form = zero
		.neg = false
		panic(ErrNaN{"addition of infinities with opposite signs"})
	}

	if .form == zero && .form == zero {
		// ±0 + ±0
		.acc = Exact
		.form = zero
		.neg = .neg && .neg // -0 + -0 == -0
		return 
	}

	if .form == inf || .form == zero {
		// ±Inf + y
		// x + ±0
		return .Set()
	}

	// ±0 + y
	// x + ±Inf
	return .Set()
}

// Sub sets z to the rounded difference x-y and returns z.
// Precision, rounding, and accuracy reporting are as for [Float.Add].
// Sub panics with [ErrNaN] if x and y are infinities with equal
// signs. The value of z is undefined in that case.
func ( *Float) (,  *Float) *Float {
	if debugFloat {
		.validate()
		.validate()
	}

	if .prec == 0 {
		.prec = umax32(.prec, .prec)
	}

	if .form == finite && .form == finite {
		// x - y (common case)
		 := .neg
		.neg = .neg
		if .neg !=  {
			// x - (-y) == x + y
			// (-x) - y == -(x + y)
			.uadd(, )
		} else {
			// x - y == x - y == -(y - x)
			// (-x) - (-y) == y - x == -(x - y)
			if .ucmp() > 0 {
				.usub(, )
			} else {
				.neg = !.neg
				.usub(, )
			}
		}
		if .form == zero && .mode == ToNegativeInf && .acc == Exact {
			.neg = true
		}
		return 
	}

	if .form == inf && .form == inf && .neg == .neg {
		// +Inf - +Inf
		// -Inf - -Inf
		// value of z is undefined but make sure it's valid
		.acc = Exact
		.form = zero
		.neg = false
		panic(ErrNaN{"subtraction of infinities with equal signs"})
	}

	if .form == zero && .form == zero {
		// ±0 - ±0
		.acc = Exact
		.form = zero
		.neg = .neg && !.neg // -0 - +0 == -0
		return 
	}

	if .form == inf || .form == zero {
		// ±Inf - y
		// x - ±0
		return .Set()
	}

	// ±0 - y
	// x - ±Inf
	return .Neg()
}

// Mul sets z to the rounded product x*y and returns z.
// Precision, rounding, and accuracy reporting are as for [Float.Add].
// Mul panics with [ErrNaN] if one operand is zero and the other
// operand an infinity. The value of z is undefined in that case.
func ( *Float) (,  *Float) *Float {
	if debugFloat {
		.validate()
		.validate()
	}

	if .prec == 0 {
		.prec = umax32(.prec, .prec)
	}

	.neg = .neg != .neg

	if .form == finite && .form == finite {
		// x * y (common case)
		.umul(, )
		return 
	}

	.acc = Exact
	if .form == zero && .form == inf || .form == inf && .form == zero {
		// ±0 * ±Inf
		// ±Inf * ±0
		// value of z is undefined but make sure it's valid
		.form = zero
		.neg = false
		panic(ErrNaN{"multiplication of zero with infinity"})
	}

	if .form == inf || .form == inf {
		// ±Inf * y
		// x * ±Inf
		.form = inf
		return 
	}

	// ±0 * y
	// x * ±0
	.form = zero
	return 
}

// Quo sets z to the rounded quotient x/y and returns z.
// Precision, rounding, and accuracy reporting are as for [Float.Add].
// Quo panics with [ErrNaN] if both operands are zero or infinities.
// The value of z is undefined in that case.
func ( *Float) (,  *Float) *Float {
	if debugFloat {
		.validate()
		.validate()
	}

	if .prec == 0 {
		.prec = umax32(.prec, .prec)
	}

	.neg = .neg != .neg

	if .form == finite && .form == finite {
		// x / y (common case)
		.uquo(, )
		return 
	}

	.acc = Exact
	if .form == zero && .form == zero || .form == inf && .form == inf {
		// ±0 / ±0
		// ±Inf / ±Inf
		// value of z is undefined but make sure it's valid
		.form = zero
		.neg = false
		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
	}

	if .form == zero || .form == inf {
		// ±0 / y
		// x / ±Inf
		.form = zero
		return 
	}

	// x / ±0
	// ±Inf / y
	.form = inf
	return 
}

// Cmp compares x and y and returns:
//   - -1 if x < y;
//   - 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf);
//   - +1 if x > y.
func ( *Float) ( *Float) int {
	if debugFloat {
		.validate()
		.validate()
	}

	 := .ord()
	 := .ord()
	switch {
	case  < :
		return -1
	case  > :
		return +1
	}
	// mx == my

	// only if |mx| == 1 we have to compare the mantissae
	switch  {
	case -1:
		return .ucmp()
	case +1:
		return .ucmp()
	}

	return 0
}

// ord classifies x and returns:
//
//	-2 if -Inf == x
//	-1 if -Inf < x < 0
//	 0 if x == 0 (signed or unsigned)
//	+1 if 0 < x < +Inf
//	+2 if x == +Inf
func ( *Float) () int {
	var  int
	switch .form {
	case finite:
		 = 1
	case zero:
		return 0
	case inf:
		 = 2
	}
	if .neg {
		 = -
	}
	return 
}

func umax32(,  uint32) uint32 {
	if  >  {
		return 
	}
	return 
}