// Adapted from https://github.com/Alexhuszagh/rust-lexical. //! Estimate the error in an 80-bit approximation of a float. //! //! This estimates the error in a floating-point representation. //! //! This implementation is loosely based off the Golang implementation, //! found here: //! https://golang.org/src/strconv/atof.go use super::float::*; use super::num::*; use super::rounding::*; pub(crate) trait FloatErrors { /// Get the full error scale. fn error_scale() -> u32; /// Get the half error scale. fn error_halfscale() -> u32; /// Determine if the number of errors is tolerable for float precision. fn error_is_accurate(count: u32, fp: &ExtendedFloat) -> bool; } /// Check if the error is accurate with a round-nearest rounding scheme. #[inline] fn nearest_error_is_accurate(errors: u64, fp: &ExtendedFloat, extrabits: u64) -> bool { // Round-to-nearest, need to use the halfway point. if extrabits == 65 { // Underflow, we have a shift larger than the mantissa. // Representation is valid **only** if the value is close enough // overflow to the next bit within errors. If it overflows, // the representation is **not** valid. !fp.mant.overflowing_add(errors).1 } else { let mask: u64 = lower_n_mask(extrabits); let extra: u64 = fp.mant & mask; // Round-to-nearest, need to check if we're close to halfway. // IE, b10100 | 100000, where `|` signifies the truncation point. let halfway: u64 = lower_n_halfway(extrabits); let cmp1 = halfway.wrapping_sub(errors) < extra; let cmp2 = extra < halfway.wrapping_add(errors); // If both comparisons are true, we have significant rounding error, // and the value cannot be exactly represented. Otherwise, the // representation is valid. !(cmp1 && cmp2) } } impl FloatErrors for u64 { #[inline] fn error_scale() -> u32 { 8 } #[inline] fn error_halfscale() -> u32 { u64::error_scale() / 2 } #[inline] fn error_is_accurate(count: u32, fp: &ExtendedFloat) -> bool { // Determine if extended-precision float is a good approximation. // If the error has affected too many units, the float will be // inaccurate, or if the representation is too close to halfway // that any operations could affect this halfway representation. // See the documentation for dtoa for more information. let bias = -(F::EXPONENT_BIAS - F::MANTISSA_SIZE); let denormal_exp = bias - 63; // This is always a valid u32, since (denormal_exp - fp.exp) // will always be positive and the significand size is {23, 52}. let extrabits = if fp.exp <= denormal_exp { 64 - F::MANTISSA_SIZE + denormal_exp - fp.exp } else { 63 - F::MANTISSA_SIZE }; // Our logic is as follows: we want to determine if the actual // mantissa and the errors during calculation differ significantly // from the rounding point. The rounding point for round-nearest // is the halfway point, IE, this when the truncated bits start // with b1000..., while the rounding point for the round-toward // is when the truncated bits are equal to 0. // To do so, we can check whether the rounding point +/- the error // are >/< the actual lower n bits. // // For whether we need to use signed or unsigned types for this // analysis, see this example, using u8 rather than u64 to simplify // things. // // # Comparisons // cmp1 = (halfway - errors) < extra // cmp1 = extra < (halfway + errors) // // # Large Extrabits, Low Errors // // extrabits = 8 // halfway = 0b10000000 // extra = 0b10000010 // errors = 0b00000100 // halfway - errors = 0b01111100 // halfway + errors = 0b10000100 // // Unsigned: // halfway - errors = 124 // halfway + errors = 132 // extra = 130 // cmp1 = true // cmp2 = true // Signed: // halfway - errors = 124 // halfway + errors = -124 // extra = -126 // cmp1 = false // cmp2 = true // // # Conclusion // // Since errors will always be small, and since we want to detect // if the representation is accurate, we need to use an **unsigned** // type for comparisons. let extrabits = extrabits as u64; let errors = count as u64; if extrabits > 65 { // Underflow, we have a literal 0. return true; } nearest_error_is_accurate(errors, fp, extrabits) } }