diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buffer/mod.rs | 28 | ||||
-rw-r--r-- | src/d2s.rs | 60 | ||||
-rw-r--r-- | src/d2s_intrinsics.rs | 82 | ||||
-rw-r--r-- | src/d2s_small_table.rs | 71 | ||||
-rw-r--r-- | src/lib.rs | 40 | ||||
-rw-r--r-- | src/pretty/exponent.rs | 6 | ||||
-rw-r--r-- | src/pretty/mantissa.rs | 48 | ||||
-rw-r--r-- | src/pretty/mod.rs | 6 | ||||
-rw-r--r-- | src/s2d.rs | 5 | ||||
-rw-r--r-- | src/s2f.rs | 26 |
10 files changed, 116 insertions, 256 deletions
diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index df21fe0..2ccd9b0 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -1,13 +1,12 @@ use crate::raw; -#[cfg(maybe_uninit)] use core::mem::MaybeUninit; -use core::{mem, slice, str}; +use core::{slice, str}; #[cfg(feature = "no-panic")] use no_panic::no_panic; -const NAN: &'static str = "NaN"; -const INFINITY: &'static str = "inf"; -const NEG_INFINITY: &'static str = "-inf"; +const NAN: &str = "NaN"; +const INFINITY: &str = "inf"; +const NEG_INFINITY: &str = "-inf"; /// Safe API for formatting floating point numbers to text. /// @@ -19,10 +18,7 @@ const NEG_INFINITY: &'static str = "-inf"; /// assert_eq!(printed, "1.234"); /// ``` pub struct Buffer { - #[cfg(maybe_uninit)] bytes: [MaybeUninit<u8>; 24], - #[cfg(not(maybe_uninit))] - bytes: [u8; 24], } impl Buffer { @@ -31,14 +27,8 @@ impl Buffer { #[inline] #[cfg_attr(feature = "no-panic", no_panic)] pub fn new() -> Self { - // assume_init is safe here, since this is an array of MaybeUninit, which does not need - // to be initialized. - #[cfg(maybe_uninit)] let bytes = [MaybeUninit::<u8>::uninit(); 24]; - #[cfg(not(maybe_uninit))] - let bytes = unsafe { mem::uninitialized() }; - - Buffer { bytes: bytes } + Buffer { bytes } } /// Print a floating point number into this buffer and return a reference to @@ -125,7 +115,7 @@ impl Sealed for f32 { #[inline] fn is_nonfinite(self) -> bool { const EXP_MASK: u32 = 0x7f800000; - let bits = unsafe { mem::transmute::<f32, u32>(self) }; + let bits = self.to_bits(); bits & EXP_MASK == EXP_MASK } @@ -134,7 +124,7 @@ impl Sealed for f32 { fn format_nonfinite(self) -> &'static str { const MANTISSA_MASK: u32 = 0x007fffff; const SIGN_MASK: u32 = 0x80000000; - let bits = unsafe { mem::transmute::<f32, u32>(self) }; + let bits = self.to_bits(); if bits & MANTISSA_MASK != 0 { NAN } else if bits & SIGN_MASK != 0 { @@ -154,7 +144,7 @@ impl Sealed for f64 { #[inline] fn is_nonfinite(self) -> bool { const EXP_MASK: u64 = 0x7ff0000000000000; - let bits = unsafe { mem::transmute::<f64, u64>(self) }; + let bits = self.to_bits(); bits & EXP_MASK == EXP_MASK } @@ -163,7 +153,7 @@ impl Sealed for f64 { fn format_nonfinite(self) -> &'static str { const MANTISSA_MASK: u64 = 0x000fffffffffffff; const SIGN_MASK: u64 = 0x8000000000000000; - let bits = unsafe { mem::transmute::<f64, u64>(self) }; + let bits = self.to_bits(); if bits & MANTISSA_MASK != 0 { NAN } else if bits & SIGN_MASK != 0 { @@ -24,9 +24,6 @@ pub use crate::d2s_full_table::*; use crate::d2s_intrinsics::*; #[cfg(feature = "small")] pub use crate::d2s_small_table::*; -#[cfg(not(maybe_uninit))] -use core::mem; -#[cfg(maybe_uninit)] use core::mem::MaybeUninit; pub const DOUBLE_MANTISSA_BITS: u32 = 52; @@ -117,14 +114,7 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 { let mut vr: u64; let mut vp: u64; let mut vm: u64; - #[cfg(not(maybe_uninit))] - { - vp = unsafe { mem::uninitialized() }; - vm = unsafe { mem::uninitialized() }; - } - #[cfg(maybe_uninit)] let mut vp_uninit: MaybeUninit<u64> = MaybeUninit::uninit(); - #[cfg(maybe_uninit)] let mut vm_uninit: MaybeUninit<u64> = MaybeUninit::uninit(); let e10: i32; let mut vm_is_trailing_zeros = false; @@ -147,30 +137,13 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 { DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize) }, i as u32, - #[cfg(maybe_uninit)] - { - vp_uninit.as_mut_ptr() - }, - #[cfg(not(maybe_uninit))] - { - &mut vp - }, - #[cfg(maybe_uninit)] - { - vm_uninit.as_mut_ptr() - }, - #[cfg(not(maybe_uninit))] - { - &mut vm - }, + vp_uninit.as_mut_ptr(), + vm_uninit.as_mut_ptr(), mm_shift, ) }; - #[cfg(maybe_uninit)] - { - vp = unsafe { vp_uninit.assume_init() }; - vm = unsafe { vm_uninit.assume_init() }; - } + vp = unsafe { vp_uninit.assume_init() }; + vm = unsafe { vm_uninit.assume_init() }; if q <= 21 { // This should use q <= 22, but I think 21 is also safe. Smaller values // may still be safe, but it's more difficult to reason about them. @@ -206,30 +179,13 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 { DOUBLE_POW5_SPLIT.get_unchecked(i as usize) }, j as u32, - #[cfg(maybe_uninit)] - { - vp_uninit.as_mut_ptr() - }, - #[cfg(not(maybe_uninit))] - { - &mut vp - }, - #[cfg(maybe_uninit)] - { - vm_uninit.as_mut_ptr() - }, - #[cfg(not(maybe_uninit))] - { - &mut vm - }, + vp_uninit.as_mut_ptr(), + vm_uninit.as_mut_ptr(), mm_shift, ) }; - #[cfg(maybe_uninit)] - { - vp = unsafe { vp_uninit.assume_init() }; - vm = unsafe { vm_uninit.assume_init() }; - } + vp = unsafe { vp_uninit.assume_init() }; + vm = unsafe { vm_uninit.assume_init() }; if q <= 1 { // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. // mv = 4 * m2, so it always has at least two trailing 0 bits. diff --git a/src/d2s_intrinsics.rs b/src/d2s_intrinsics.rs index 918ccab..f244a4d 100644 --- a/src/d2s_intrinsics.rs +++ b/src/d2s_intrinsics.rs @@ -20,46 +20,6 @@ use core::ptr; -// Returns (lo, hi). -#[cfg(not(integer128))] -#[cfg_attr(feature = "no-panic", inline)] -pub fn umul128(a: u64, b: u64) -> (u64, u64) { - let a_lo = a as u32; - let a_hi = (a >> 32) as u32; - let b_lo = b as u32; - let b_hi = (b >> 32) as u32; - - let b00 = a_lo as u64 * b_lo as u64; - let b01 = a_lo as u64 * b_hi as u64; - let b10 = a_hi as u64 * b_lo as u64; - let b11 = a_hi as u64 * b_hi as u64; - - let b00_lo = b00 as u32; - let b00_hi = (b00 >> 32) as u32; - - let mid1 = b10 + b00_hi as u64; - let mid1_lo = mid1 as u32; - let mid1_hi = (mid1 >> 32) as u32; - - let mid2 = b01 + mid1_lo as u64; - let mid2_lo = mid2 as u32; - let mid2_hi = (mid2 >> 32) as u32; - - let p_hi = b11 + mid1_hi as u64 + mid2_hi as u64; - let p_lo = ((mid2_lo as u64) << 32) | b00_lo as u64; - - (p_lo, p_hi) -} - -#[cfg(not(integer128))] -#[cfg_attr(feature = "no-panic", inline)] -pub fn shiftright128(lo: u64, hi: u64, dist: u32) -> u64 { - // We don't need to handle the case dist >= 64 here (see above). - debug_assert!(dist > 0); - debug_assert!(dist < 64); - (hi << (64 - dist)) | (lo >> dist) -} - #[cfg_attr(feature = "no-panic", inline)] pub fn div5(x: u64) -> u64 { x / 5 @@ -107,7 +67,6 @@ pub fn multiple_of_power_of_2(value: u64, p: u32) -> bool { (value & ((1u64 << p) - 1)) == 0 } -#[cfg(integer128)] #[cfg_attr(feature = "no-panic", inline)] pub fn mul_shift_64(m: u64, mul: &(u64, u64), j: u32) -> u64 { let b0 = m as u128 * mul.0 as u128; @@ -115,7 +74,6 @@ pub fn mul_shift_64(m: u64, mul: &(u64, u64), j: u32) -> u64 { (((b0 >> 64) + b2) >> (j - 64)) as u64 } -#[cfg(integer128)] #[cfg_attr(feature = "no-panic", inline)] pub unsafe fn mul_shift_all_64( m: u64, @@ -129,43 +87,3 @@ pub unsafe fn mul_shift_all_64( ptr::write(vm, mul_shift_64(4 * m - 1 - mm_shift as u64, mul, j)); mul_shift_64(4 * m, mul, j) } - -#[cfg(not(integer128))] -#[cfg_attr(feature = "no-panic", inline)] -pub unsafe fn mul_shift_all_64( - mut m: u64, - mul: &(u64, u64), - j: u32, - vp: *mut u64, - vm: *mut u64, - mm_shift: u32, -) -> u64 { - m <<= 1; - // m is maximum 55 bits - let (lo, tmp) = umul128(m, mul.0); - let (mut mid, mut hi) = umul128(m, mul.1); - mid = mid.wrapping_add(tmp); - hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi - - let lo2 = lo.wrapping_add(mul.0); - let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64); - let hi2 = hi.wrapping_add((mid2 < mid) as u64); - ptr::write(vp, shiftright128(mid2, hi2, j - 64 - 1)); - - if mm_shift == 1 { - let lo3 = lo.wrapping_sub(mul.0); - let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64); - let hi3 = hi.wrapping_sub((mid3 > mid) as u64); - ptr::write(vm, shiftright128(mid3, hi3, j - 64 - 1)); - } else { - let lo3 = lo + lo; - let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64); - let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64); - let lo4 = lo3.wrapping_sub(mul.0); - let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64); - let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64); - ptr::write(vm, shiftright128(mid4, hi4, j - 64)); - } - - shiftright128(mid, hi, j - 64 - 1) -} diff --git a/src/d2s_small_table.rs b/src/d2s_small_table.rs index 08519a2..262fc04 100644 --- a/src/d2s_small_table.rs +++ b/src/d2s_small_table.rs @@ -19,10 +19,8 @@ // KIND, either express or implied. use crate::common::*; -#[cfg(not(integer128))] -use crate::d2s_intrinsics::*; -pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 13] = [ +pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 15] = [ (1, 2305843009213693952), (5955668970331000884, 1784059615882449851), (8982663654677661702, 1380349269358112757), @@ -36,6 +34,8 @@ pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 13] = [ (12533209867169019542, 1418129833677084982), (5577825024675947042, 2194449627517475473), (11006974540203867551, 1697873161311732311), + (10313493231639821582, 1313665730009899186), + (12701016819766672773, 2032799256770390445), ]; pub static POW5_INV_OFFSETS: [u32; 19] = [ @@ -96,7 +96,6 @@ pub static DOUBLE_POW5_TABLE: [u64; 26] = [ ]; // Computes 5^i in the form required by Ryū. -#[cfg(integer128)] #[cfg_attr(feature = "no-panic", inline)] pub unsafe fn compute_pow5(i: u32) -> (u64, u64) { let base = i / DOUBLE_POW5_TABLE.len() as u32; @@ -112,7 +111,7 @@ pub unsafe fn compute_pow5(i: u32) -> (u64, u64) { let b0 = m as u128 * mul.0 as u128; let b2 = m as u128 * mul.1 as u128; let delta = pow5bits(i as i32) - pow5bits(base2 as i32); - debug_assert!(base < POW5_OFFSETS.len() as u32); + debug_assert!(i / 16 < POW5_OFFSETS.len() as u32); let shifted_sum = (b0 >> delta) + (b2 << (64 - delta)) + ((*POW5_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u128; @@ -120,7 +119,6 @@ pub unsafe fn compute_pow5(i: u32) -> (u64, u64) { } // Computes 5^-i in the form required by Ryū. -#[cfg(integer128)] #[cfg_attr(feature = "no-panic", inline)] pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) { let base = (i + DOUBLE_POW5_TABLE.len() as u32 - 1) / DOUBLE_POW5_TABLE.len() as u32; @@ -142,64 +140,3 @@ pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) { + ((*POW5_INV_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u128; (shifted_sum as u64, (shifted_sum >> 64) as u64) } - -// Computes 5^i in the form required by Ryū, and stores it in the given pointer. -#[cfg(not(integer128))] -#[cfg_attr(feature = "no-panic", inline)] -pub unsafe fn compute_pow5(i: u32) -> (u64, u64) { - let base = i / DOUBLE_POW5_TABLE.len() as u32; - let base2 = base * DOUBLE_POW5_TABLE.len() as u32; - let offset = i - base2; - debug_assert!(base < DOUBLE_POW5_SPLIT2.len() as u32); - let mul = *DOUBLE_POW5_SPLIT2.get_unchecked(base as usize); - if offset == 0 { - return mul; - } - debug_assert!(offset < DOUBLE_POW5_TABLE.len() as u32); - let m = *DOUBLE_POW5_TABLE.get_unchecked(offset as usize); - let (low1, mut high1) = umul128(m, mul.1); - let (low0, high0) = umul128(m, mul.0); - let sum = high0 + low1; - if sum < high0 { - high1 += 1; // overflow into high1 - } - // high1 | sum | low0 - let delta = pow5bits(i as i32) - pow5bits(base2 as i32); - debug_assert!(base < POW5_OFFSETS.len() as u32); - ( - shiftright128(low0, sum, delta as u32) - + ((*POW5_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u64, - shiftright128(sum, high1, delta as u32), - ) -} - -// Computes 5^-i in the form required by Ryū, and stores it in the given pointer. -#[cfg(not(integer128))] -#[cfg_attr(feature = "no-panic", inline)] -pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) { - let base = (i + DOUBLE_POW5_TABLE.len() as u32 - 1) / DOUBLE_POW5_TABLE.len() as u32; - let base2 = base * DOUBLE_POW5_TABLE.len() as u32; - let offset = base2 - i; - debug_assert!(base < DOUBLE_POW5_INV_SPLIT2.len() as u32); - let mul = *DOUBLE_POW5_INV_SPLIT2.get_unchecked(base as usize); // 1/5^base2 - if offset == 0 { - return mul; - } - debug_assert!(offset < DOUBLE_POW5_TABLE.len() as u32); - let m = *DOUBLE_POW5_TABLE.get_unchecked(offset as usize); - let (low1, mut high1) = umul128(m, mul.1); - let (low0, high0) = umul128(m, mul.0 - 1); - let sum = high0 + low1; - if sum < high0 { - high1 += 1; // overflow into high1 - } - // high1 | sum | low0 - let delta = pow5bits(base2 as i32) - pow5bits(i as i32); - debug_assert!(base < POW5_INV_OFFSETS.len() as u32); - ( - shiftright128(low0, sum, delta as u32) - + 1 - + ((*POW5_INV_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u64, - shiftright128(sum, high1, delta as u32), - ) -} @@ -29,7 +29,9 @@ //! } //! ``` //! -//! ## Performance +//! ## Performance (lower is better) +//! +//! ![performance](https://raw.githubusercontent.com/dtolnay/ryu/master/performance.png) //! //! You can run upstream's benchmarks with: //! @@ -62,20 +64,10 @@ //! $ cargo bench //! ``` //! -//! The benchmark shows Ryū approximately 4-10x faster than the standard library +//! The benchmark shows Ryū approximately 2-5x faster than the standard library //! across a range of f32 and f64 inputs. Measurements are in nanoseconds per //! iteration; smaller is better. //! -//! | type=f32 | 0.0 | 0.1234 | 2.718281828459045 | f32::MAX | -//! |:--------:|:----:|:------:|:-----------------:|:--------:| -//! | RYU | 3ns | 28ns | 23ns | 22ns | -//! | STD | 40ns | 106ns | 128ns | 110ns | -//! -//! | type=f64 | 0.0 | 0.1234 | 2.718281828459045 | f64::MAX | -//! |:--------:|:----:|:------:|:-----------------:|:--------:| -//! | RYU | 3ns | 50ns | 35ns | 32ns | -//! | STD | 39ns | 105ns | 128ns | 202ns | -//! //! ## Formatting //! //! This library tends to produce more human-readable output than the standard @@ -89,11 +81,25 @@ //! notation. #![no_std] -#![doc(html_root_url = "https://docs.rs/ryu/1.0.5")] -#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] -#![cfg_attr( - feature = "cargo-clippy", - allow(cast_lossless, many_single_char_names, unreadable_literal,) +#![doc(html_root_url = "https://docs.rs/ryu/1.0.9")] +#![allow( + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::checked_conversions, + clippy::doc_markdown, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::many_single_char_names, + clippy::missing_panics_doc, + clippy::module_name_repetitions, + clippy::must_use_candidate, + clippy::similar_names, + clippy::too_many_lines, + clippy::unreadable_literal, + clippy::unseparated_literal_suffix, + clippy::wildcard_imports )] mod buffer; diff --git a/src/pretty/exponent.rs b/src/pretty/exponent.rs index 84053d5..b72add5 100644 --- a/src/pretty/exponent.rs +++ b/src/pretty/exponent.rs @@ -14,11 +14,11 @@ pub unsafe fn write_exponent3(mut k: isize, mut result: *mut u8) -> usize { if k >= 100 { *result = b'0' + (k / 100) as u8; k %= 100; - let d = DIGIT_TABLE.get_unchecked(k as usize * 2); + let d = DIGIT_TABLE.as_ptr().offset(k * 2); ptr::copy_nonoverlapping(d, result.offset(1), 2); sign as usize + 3 } else if k >= 10 { - let d = DIGIT_TABLE.get_unchecked(k as usize * 2); + let d = DIGIT_TABLE.as_ptr().offset(k * 2); ptr::copy_nonoverlapping(d, result, 2); sign as usize + 2 } else { @@ -38,7 +38,7 @@ pub unsafe fn write_exponent2(mut k: isize, mut result: *mut u8) -> usize { debug_assert!(k < 100); if k >= 10 { - let d = DIGIT_TABLE.get_unchecked(k as usize * 2); + let d = DIGIT_TABLE.as_ptr().offset(k * 2); ptr::copy_nonoverlapping(d, result, 2); sign as usize + 2 } else { diff --git a/src/pretty/mantissa.rs b/src/pretty/mantissa.rs index e5fc202..150c79c 100644 --- a/src/pretty/mantissa.rs +++ b/src/pretty/mantissa.rs @@ -15,10 +15,26 @@ pub unsafe fn write_mantissa_long(mut output: u64, mut result: *mut u8) { let c1 = (c / 100) << 1; let d0 = (d % 100) << 1; let d1 = (d / 100) << 1; - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize), result.offset(-2), 2); - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize), result.offset(-4), 2); - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(d0 as usize), result.offset(-6), 2); - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(d1 as usize), result.offset(-8), 2); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(c0 as isize), + result.offset(-2), + 2, + ); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(c1 as isize), + result.offset(-4), + 2, + ); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(d0 as isize), + result.offset(-6), + 2, + ); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(d1 as isize), + result.offset(-8), + 2, + ); result = result.offset(-8); } write_mantissa(output as u32, result); @@ -31,19 +47,35 @@ pub unsafe fn write_mantissa(mut output: u32, mut result: *mut u8) { output /= 10_000; let c0 = (c % 100) << 1; let c1 = (c / 100) << 1; - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize), result.offset(-2), 2); - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize), result.offset(-4), 2); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(c0 as isize), + result.offset(-2), + 2, + ); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(c1 as isize), + result.offset(-4), + 2, + ); result = result.offset(-4); } if output >= 100 { let c = ((output % 100) << 1) as u32; output /= 100; - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize), result.offset(-2), 2); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(c as isize), + result.offset(-2), + 2, + ); result = result.offset(-2); } if output >= 10 { let c = (output << 1) as u32; - ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize), result.offset(-2), 2); + ptr::copy_nonoverlapping( + DIGIT_TABLE.as_ptr().offset(c as isize), + result.offset(-2), + 2, + ); } else { *result.offset(-1) = b'0' + output as u8; } diff --git a/src/pretty/mod.rs b/src/pretty/mod.rs index a82692d..b196a11 100644 --- a/src/pretty/mod.rs +++ b/src/pretty/mod.rs @@ -6,7 +6,7 @@ use self::mantissa::*; use crate::common; use crate::d2s::{self, *}; use crate::f2s::*; -use core::{mem, ptr}; +use core::ptr; #[cfg(feature = "no-panic")] use no_panic::no_panic; @@ -50,7 +50,7 @@ use no_panic::no_panic; #[must_use] #[cfg_attr(feature = "no-panic", no_panic)] pub unsafe fn format64(f: f64, result: *mut u8) -> usize { - let bits = mem::transmute::<f64, u64>(f); + let bits = f.to_bits(); let sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1); let ieee_exponent = @@ -157,7 +157,7 @@ pub unsafe fn format64(f: f64, result: *mut u8) -> usize { #[must_use] #[cfg_attr(feature = "no-panic", no_panic)] pub unsafe fn format32(f: f32, result: *mut u8) -> usize { - let bits = mem::transmute::<f32, u32>(f); + let bits = f.to_bits(); let sign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; let ieee_mantissa = bits & ((1u32 << FLOAT_MANTISSA_BITS) - 1); let ieee_exponent = @@ -203,12 +203,13 @@ pub fn s2d(buffer: &[u8]) -> Result<f64, Error> { let round_up = last_removed_bit != 0 && (!trailing_zeros || ((m2 >> shift) & 1) != 0); let mut ieee_m2 = (m2 >> shift).wrapping_add(round_up as u64); - if ieee_m2 == (1_u64 << (d2s::DOUBLE_MANTISSA_BITS + 1)) { + debug_assert!(ieee_m2 <= 1_u64 << (d2s::DOUBLE_MANTISSA_BITS + 1)); + ieee_m2 &= (1_u64 << d2s::DOUBLE_MANTISSA_BITS) - 1; + if ieee_m2 == 0 && round_up { // Due to how the IEEE represents +/-Infinity, we don't need to check // for overflow here. ieee_e2 += 1; } - ieee_m2 &= (1_u64 << d2s::DOUBLE_MANTISSA_BITS) - 1; let ieee = ((((signed_m as u64) << d2s::DOUBLE_EXPONENT_BITS) | ieee_e2 as u64) << d2s::DOUBLE_MANTISSA_BITS) | ieee_m2; @@ -153,13 +153,29 @@ pub fn s2f(buffer: &[u8]) -> Result<f32, Error> { .wrapping_add(e10 as u32) .wrapping_sub(ceil_log2_pow5(-e10) as u32) .wrapping_sub(f2s::FLOAT_MANTISSA_BITS + 1) as i32; + + // We now compute [m10 * 10^e10 / 2^e2] = [m10 / (5^(-e10) 2^(e2-e10))]. let j = e2 .wrapping_sub(e10) .wrapping_add(ceil_log2_pow5(-e10)) .wrapping_sub(1) .wrapping_add(f2s::FLOAT_POW5_INV_BITCOUNT); m2 = mul_pow5_inv_div_pow2(m10, -e10 as u32, j); - trailing_zeros = multiple_of_power_of_5_32(m10, -e10 as u32); + + // We also compute if the result is exact, i.e., + // [m10 / (5^(-e10) 2^(e2-e10))] == m10 / (5^(-e10) 2^(e2-e10)) + // + // If e2-e10 >= 0, we need to check whether (5^(-e10) 2^(e2-e10)) + // divides m10, which is the case iff pow5(m10) >= -e10 AND pow2(m10) >= + // e2-e10. + // + // If e2-e10 < 0, we have actually computed [m10 * 2^(e10 e2) / + // 5^(-e10)] above, and we need to check whether 5^(-e10) divides (m10 * + // 2^(e10-e2)), which is the case iff pow5(m10 * 2^(e10-e2)) = pow5(m10) + // >= -e10. + trailing_zeros = (e2 < e10 + || (e2 - e10 < 32 && multiple_of_power_of_2_32(m10, (e2 - e10) as u32))) + && multiple_of_power_of_5_32(m10, -e10 as u32); } // Compute the final IEEE exponent. @@ -194,12 +210,16 @@ pub fn s2f(buffer: &[u8]) -> Result<f32, Error> { let round_up = last_removed_bit != 0 && (!trailing_zeros || ((m2 >> shift) & 1) != 0); let mut ieee_m2 = (m2 >> shift).wrapping_add(round_up as u32); - if ieee_m2 == (1_u32 << (f2s::FLOAT_MANTISSA_BITS + 1)) { + debug_assert!(ieee_m2 <= 1_u32 << (f2s::FLOAT_MANTISSA_BITS + 1)); + ieee_m2 &= (1_u32 << f2s::FLOAT_MANTISSA_BITS) - 1; + if ieee_m2 == 0 && round_up { + // Rounding up may overflow the mantissa. + // In this case we move a trailing zero of the mantissa into the + // exponent. // Due to how the IEEE represents +/-Infinity, we don't need to check // for overflow here. ieee_e2 += 1; } - ieee_m2 &= (1_u32 << f2s::FLOAT_MANTISSA_BITS) - 1; let ieee = ((((signed_m as u32) << f2s::FLOAT_EXPONENT_BITS) | ieee_e2 as u32) << f2s::FLOAT_MANTISSA_BITS) | ieee_m2; |