// Adapted from https://github.com/Alexhuszagh/rust-lexical. //! Defines rounding schemes for floating-point numbers. use super::float::ExtendedFloat; use super::num::*; use super::shift::*; use core::mem; // MASKS /// Calculate a scalar factor of 2 above the halfway point. #[inline] pub(crate) fn nth_bit(n: u64) -> u64 { let bits: u64 = mem::size_of::() as u64 * 8; debug_assert!(n < bits, "nth_bit() overflow in shl."); 1 << n } /// Generate a bitwise mask for the lower `n` bits. #[inline] pub(crate) fn lower_n_mask(n: u64) -> u64 { let bits: u64 = mem::size_of::() as u64 * 8; debug_assert!(n <= bits, "lower_n_mask() overflow in shl."); if n == bits { u64::max_value() } else { (1 << n) - 1 } } /// Calculate the halfway point for the lower `n` bits. #[inline] pub(crate) fn lower_n_halfway(n: u64) -> u64 { let bits: u64 = mem::size_of::() as u64 * 8; debug_assert!(n <= bits, "lower_n_halfway() overflow in shl."); if n == 0 { 0 } else { nth_bit(n - 1) } } /// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position. #[inline] pub(crate) fn internal_n_mask(bit: u64, n: u64) -> u64 { let bits: u64 = mem::size_of::() as u64 * 8; debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl."); debug_assert!(n <= bits, "internal_n_halfway() overflow in shl."); debug_assert!(bit >= n, "internal_n_halfway() overflow in sub."); lower_n_mask(bit) ^ lower_n_mask(bit - n) } // NEAREST ROUNDING // Shift right N-bytes and round to the nearest. // // Return if we are above halfway and if we are halfway. #[inline] pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32) -> (bool, bool) { // Extract the truncated bits using mask. // Calculate if the value of the truncated bits are either above // the mid-way point, or equal to it. // // For example, for 4 truncated bytes, the mask would be b1111 // and the midway point would be b1000. let mask: u64 = lower_n_mask(shift as u64); let halfway: u64 = lower_n_halfway(shift as u64); let truncated_bits = fp.mant & mask; let is_above = truncated_bits > halfway; let is_halfway = truncated_bits == halfway; // Bit shift so the leading bit is in the hidden bit. overflowing_shr(fp, shift); (is_above, is_halfway) } // Tie rounded floating point to event. #[inline] pub(crate) fn tie_even(fp: &mut ExtendedFloat, is_above: bool, is_halfway: bool) { // Extract the last bit after shifting (and determine if it is odd). let is_odd = fp.mant & 1 == 1; // Calculate if we need to roundup. // We need to roundup if we are above halfway, or if we are odd // and at half-way (need to tie-to-even). if is_above || (is_odd && is_halfway) { fp.mant += 1; } } // Shift right N-bytes and round nearest, tie-to-even. // // Floating-point arithmetic uses round to nearest, ties to even, // which rounds to the nearest value, if the value is halfway in between, // round to an even value. #[inline] pub(crate) fn round_nearest_tie_even(fp: &mut ExtendedFloat, shift: i32) { let (is_above, is_halfway) = round_nearest(fp, shift); tie_even(fp, is_above, is_halfway); } // DIRECTED ROUNDING // Shift right N-bytes and round towards a direction. // // Return if we have any truncated bytes. #[inline] fn round_toward(fp: &mut ExtendedFloat, shift: i32) -> bool { let mask: u64 = lower_n_mask(shift as u64); let truncated_bits = fp.mant & mask; // Bit shift so the leading bit is in the hidden bit. overflowing_shr(fp, shift); truncated_bits != 0 } // Round down. #[inline] fn downard(_: &mut ExtendedFloat, _: bool) {} // Shift right N-bytes and round toward zero. // // Floating-point arithmetic defines round toward zero, which rounds // towards positive zero. #[inline] pub(crate) fn round_downward(fp: &mut ExtendedFloat, shift: i32) { // Bit shift so the leading bit is in the hidden bit. // No rounding schemes, so we just ignore everything else. let is_truncated = round_toward(fp, shift); downard(fp, is_truncated); } // ROUND TO FLOAT // Shift the ExtendedFloat fraction to the fraction bits in a native float. // // Floating-point arithmetic uses round to nearest, ties to even, // which rounds to the nearest value, if the value is halfway in between, // round to an even value. #[inline] pub(crate) fn round_to_float(fp: &mut ExtendedFloat, algorithm: Algorithm) where F: Float, Algorithm: FnOnce(&mut ExtendedFloat, i32), { // Calculate the difference to allow a single calculation // rather than a loop, to minimize the number of ops required. // This does underflow detection. let final_exp = fp.exp + F::DEFAULT_SHIFT; if final_exp < F::DENORMAL_EXPONENT { // We would end up with a denormal exponent, try to round to more // digits. Only shift right if we can avoid zeroing out the value, // which requires the exponent diff to be < M::BITS. The value // is already normalized, so we shouldn't have any issue zeroing // out the value. let diff = F::DENORMAL_EXPONENT - fp.exp; if diff <= u64::FULL { // We can avoid underflow, can get a valid representation. algorithm(fp, diff); } else { // Certain underflow, assign literal 0s. fp.mant = 0; fp.exp = 0; } } else { algorithm(fp, F::DEFAULT_SHIFT); } if fp.mant & F::CARRY_MASK == F::CARRY_MASK { // Roundup carried over to 1 past the hidden bit. shr(fp, 1); } } // AVOID OVERFLOW/UNDERFLOW // Avoid overflow for large values, shift left as needed. // // Shift until a 1-bit is in the hidden bit, if the mantissa is not 0. #[inline] pub(crate) fn avoid_overflow(fp: &mut ExtendedFloat) where F: Float, { // Calculate the difference to allow a single calculation // rather than a loop, minimizing the number of ops required. if fp.exp >= F::MAX_EXPONENT { let diff = fp.exp - F::MAX_EXPONENT; if diff <= F::MANTISSA_SIZE { // Our overflow mask needs to start at the hidden bit, or at // `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set, // to see if our value overflows. let bit = (F::MANTISSA_SIZE + 1) as u64; let n = (diff + 1) as u64; let mask = internal_n_mask(bit, n); if (fp.mant & mask) == 0 { // If we have no 1-bit in the hidden-bit position, // which is index 0, we need to shift 1. let shift = diff + 1; shl(fp, shift); } } } } // ROUND TO NATIVE // Round an extended-precision float to a native float representation. #[inline] pub(crate) fn round_to_native(fp: &mut ExtendedFloat, algorithm: Algorithm) where F: Float, Algorithm: FnOnce(&mut ExtendedFloat, i32), { // Shift all the way left, to ensure a consistent representation. // The following right-shifts do not work for a non-normalized number. fp.normalize(); // Round so the fraction is in a native mantissa representation, // and avoid overflow/underflow. round_to_float::(fp, algorithm); avoid_overflow::(fp); }