aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/buffer/mod.rs28
-rw-r--r--src/d2s.rs60
-rw-r--r--src/d2s_intrinsics.rs82
-rw-r--r--src/d2s_small_table.rs71
-rw-r--r--src/lib.rs40
-rw-r--r--src/pretty/exponent.rs6
-rw-r--r--src/pretty/mantissa.rs48
-rw-r--r--src/pretty/mod.rs6
-rw-r--r--src/s2d.rs5
-rw-r--r--src/s2f.rs26
10 files changed, 116 insertions, 256 deletions
diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs
index df21fe0..2ccd9b0 100644
--- a/src/buffer/mod.rs
+++ b/src/buffer/mod.rs
@@ -1,13 +1,12 @@
use crate::raw;
-#[cfg(maybe_uninit)]
use core::mem::MaybeUninit;
-use core::{mem, slice, str};
+use core::{slice, str};
#[cfg(feature = "no-panic")]
use no_panic::no_panic;
-const NAN: &'static str = "NaN";
-const INFINITY: &'static str = "inf";
-const NEG_INFINITY: &'static str = "-inf";
+const NAN: &str = "NaN";
+const INFINITY: &str = "inf";
+const NEG_INFINITY: &str = "-inf";
/// Safe API for formatting floating point numbers to text.
///
@@ -19,10 +18,7 @@ const NEG_INFINITY: &'static str = "-inf";
/// assert_eq!(printed, "1.234");
/// ```
pub struct Buffer {
- #[cfg(maybe_uninit)]
bytes: [MaybeUninit<u8>; 24],
- #[cfg(not(maybe_uninit))]
- bytes: [u8; 24],
}
impl Buffer {
@@ -31,14 +27,8 @@ impl Buffer {
#[inline]
#[cfg_attr(feature = "no-panic", no_panic)]
pub fn new() -> Self {
- // assume_init is safe here, since this is an array of MaybeUninit, which does not need
- // to be initialized.
- #[cfg(maybe_uninit)]
let bytes = [MaybeUninit::<u8>::uninit(); 24];
- #[cfg(not(maybe_uninit))]
- let bytes = unsafe { mem::uninitialized() };
-
- Buffer { bytes: bytes }
+ Buffer { bytes }
}
/// Print a floating point number into this buffer and return a reference to
@@ -125,7 +115,7 @@ impl Sealed for f32 {
#[inline]
fn is_nonfinite(self) -> bool {
const EXP_MASK: u32 = 0x7f800000;
- let bits = unsafe { mem::transmute::<f32, u32>(self) };
+ let bits = self.to_bits();
bits & EXP_MASK == EXP_MASK
}
@@ -134,7 +124,7 @@ impl Sealed for f32 {
fn format_nonfinite(self) -> &'static str {
const MANTISSA_MASK: u32 = 0x007fffff;
const SIGN_MASK: u32 = 0x80000000;
- let bits = unsafe { mem::transmute::<f32, u32>(self) };
+ let bits = self.to_bits();
if bits & MANTISSA_MASK != 0 {
NAN
} else if bits & SIGN_MASK != 0 {
@@ -154,7 +144,7 @@ impl Sealed for f64 {
#[inline]
fn is_nonfinite(self) -> bool {
const EXP_MASK: u64 = 0x7ff0000000000000;
- let bits = unsafe { mem::transmute::<f64, u64>(self) };
+ let bits = self.to_bits();
bits & EXP_MASK == EXP_MASK
}
@@ -163,7 +153,7 @@ impl Sealed for f64 {
fn format_nonfinite(self) -> &'static str {
const MANTISSA_MASK: u64 = 0x000fffffffffffff;
const SIGN_MASK: u64 = 0x8000000000000000;
- let bits = unsafe { mem::transmute::<f64, u64>(self) };
+ let bits = self.to_bits();
if bits & MANTISSA_MASK != 0 {
NAN
} else if bits & SIGN_MASK != 0 {
diff --git a/src/d2s.rs b/src/d2s.rs
index 862fd5f..392577a 100644
--- a/src/d2s.rs
+++ b/src/d2s.rs
@@ -24,9 +24,6 @@ pub use crate::d2s_full_table::*;
use crate::d2s_intrinsics::*;
#[cfg(feature = "small")]
pub use crate::d2s_small_table::*;
-#[cfg(not(maybe_uninit))]
-use core::mem;
-#[cfg(maybe_uninit)]
use core::mem::MaybeUninit;
pub const DOUBLE_MANTISSA_BITS: u32 = 52;
@@ -117,14 +114,7 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
let mut vr: u64;
let mut vp: u64;
let mut vm: u64;
- #[cfg(not(maybe_uninit))]
- {
- vp = unsafe { mem::uninitialized() };
- vm = unsafe { mem::uninitialized() };
- }
- #[cfg(maybe_uninit)]
let mut vp_uninit: MaybeUninit<u64> = MaybeUninit::uninit();
- #[cfg(maybe_uninit)]
let mut vm_uninit: MaybeUninit<u64> = MaybeUninit::uninit();
let e10: i32;
let mut vm_is_trailing_zeros = false;
@@ -147,30 +137,13 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize)
},
i as u32,
- #[cfg(maybe_uninit)]
- {
- vp_uninit.as_mut_ptr()
- },
- #[cfg(not(maybe_uninit))]
- {
- &mut vp
- },
- #[cfg(maybe_uninit)]
- {
- vm_uninit.as_mut_ptr()
- },
- #[cfg(not(maybe_uninit))]
- {
- &mut vm
- },
+ vp_uninit.as_mut_ptr(),
+ vm_uninit.as_mut_ptr(),
mm_shift,
)
};
- #[cfg(maybe_uninit)]
- {
- vp = unsafe { vp_uninit.assume_init() };
- vm = unsafe { vm_uninit.assume_init() };
- }
+ vp = unsafe { vp_uninit.assume_init() };
+ vm = unsafe { vm_uninit.assume_init() };
if q <= 21 {
// This should use q <= 22, but I think 21 is also safe. Smaller values
// may still be safe, but it's more difficult to reason about them.
@@ -206,30 +179,13 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
DOUBLE_POW5_SPLIT.get_unchecked(i as usize)
},
j as u32,
- #[cfg(maybe_uninit)]
- {
- vp_uninit.as_mut_ptr()
- },
- #[cfg(not(maybe_uninit))]
- {
- &mut vp
- },
- #[cfg(maybe_uninit)]
- {
- vm_uninit.as_mut_ptr()
- },
- #[cfg(not(maybe_uninit))]
- {
- &mut vm
- },
+ vp_uninit.as_mut_ptr(),
+ vm_uninit.as_mut_ptr(),
mm_shift,
)
};
- #[cfg(maybe_uninit)]
- {
- vp = unsafe { vp_uninit.assume_init() };
- vm = unsafe { vm_uninit.assume_init() };
- }
+ vp = unsafe { vp_uninit.assume_init() };
+ vm = unsafe { vm_uninit.assume_init() };
if q <= 1 {
// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
// mv = 4 * m2, so it always has at least two trailing 0 bits.
diff --git a/src/d2s_intrinsics.rs b/src/d2s_intrinsics.rs
index 918ccab..f244a4d 100644
--- a/src/d2s_intrinsics.rs
+++ b/src/d2s_intrinsics.rs
@@ -20,46 +20,6 @@
use core::ptr;
-// Returns (lo, hi).
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub fn umul128(a: u64, b: u64) -> (u64, u64) {
- let a_lo = a as u32;
- let a_hi = (a >> 32) as u32;
- let b_lo = b as u32;
- let b_hi = (b >> 32) as u32;
-
- let b00 = a_lo as u64 * b_lo as u64;
- let b01 = a_lo as u64 * b_hi as u64;
- let b10 = a_hi as u64 * b_lo as u64;
- let b11 = a_hi as u64 * b_hi as u64;
-
- let b00_lo = b00 as u32;
- let b00_hi = (b00 >> 32) as u32;
-
- let mid1 = b10 + b00_hi as u64;
- let mid1_lo = mid1 as u32;
- let mid1_hi = (mid1 >> 32) as u32;
-
- let mid2 = b01 + mid1_lo as u64;
- let mid2_lo = mid2 as u32;
- let mid2_hi = (mid2 >> 32) as u32;
-
- let p_hi = b11 + mid1_hi as u64 + mid2_hi as u64;
- let p_lo = ((mid2_lo as u64) << 32) | b00_lo as u64;
-
- (p_lo, p_hi)
-}
-
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub fn shiftright128(lo: u64, hi: u64, dist: u32) -> u64 {
- // We don't need to handle the case dist >= 64 here (see above).
- debug_assert!(dist > 0);
- debug_assert!(dist < 64);
- (hi << (64 - dist)) | (lo >> dist)
-}
-
#[cfg_attr(feature = "no-panic", inline)]
pub fn div5(x: u64) -> u64 {
x / 5
@@ -107,7 +67,6 @@ pub fn multiple_of_power_of_2(value: u64, p: u32) -> bool {
(value & ((1u64 << p) - 1)) == 0
}
-#[cfg(integer128)]
#[cfg_attr(feature = "no-panic", inline)]
pub fn mul_shift_64(m: u64, mul: &(u64, u64), j: u32) -> u64 {
let b0 = m as u128 * mul.0 as u128;
@@ -115,7 +74,6 @@ pub fn mul_shift_64(m: u64, mul: &(u64, u64), j: u32) -> u64 {
(((b0 >> 64) + b2) >> (j - 64)) as u64
}
-#[cfg(integer128)]
#[cfg_attr(feature = "no-panic", inline)]
pub unsafe fn mul_shift_all_64(
m: u64,
@@ -129,43 +87,3 @@ pub unsafe fn mul_shift_all_64(
ptr::write(vm, mul_shift_64(4 * m - 1 - mm_shift as u64, mul, j));
mul_shift_64(4 * m, mul, j)
}
-
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub unsafe fn mul_shift_all_64(
- mut m: u64,
- mul: &(u64, u64),
- j: u32,
- vp: *mut u64,
- vm: *mut u64,
- mm_shift: u32,
-) -> u64 {
- m <<= 1;
- // m is maximum 55 bits
- let (lo, tmp) = umul128(m, mul.0);
- let (mut mid, mut hi) = umul128(m, mul.1);
- mid = mid.wrapping_add(tmp);
- hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi
-
- let lo2 = lo.wrapping_add(mul.0);
- let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64);
- let hi2 = hi.wrapping_add((mid2 < mid) as u64);
- ptr::write(vp, shiftright128(mid2, hi2, j - 64 - 1));
-
- if mm_shift == 1 {
- let lo3 = lo.wrapping_sub(mul.0);
- let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64);
- let hi3 = hi.wrapping_sub((mid3 > mid) as u64);
- ptr::write(vm, shiftright128(mid3, hi3, j - 64 - 1));
- } else {
- let lo3 = lo + lo;
- let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64);
- let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64);
- let lo4 = lo3.wrapping_sub(mul.0);
- let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64);
- let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64);
- ptr::write(vm, shiftright128(mid4, hi4, j - 64));
- }
-
- shiftright128(mid, hi, j - 64 - 1)
-}
diff --git a/src/d2s_small_table.rs b/src/d2s_small_table.rs
index 08519a2..262fc04 100644
--- a/src/d2s_small_table.rs
+++ b/src/d2s_small_table.rs
@@ -19,10 +19,8 @@
// KIND, either express or implied.
use crate::common::*;
-#[cfg(not(integer128))]
-use crate::d2s_intrinsics::*;
-pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 13] = [
+pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 15] = [
(1, 2305843009213693952),
(5955668970331000884, 1784059615882449851),
(8982663654677661702, 1380349269358112757),
@@ -36,6 +34,8 @@ pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 13] = [
(12533209867169019542, 1418129833677084982),
(5577825024675947042, 2194449627517475473),
(11006974540203867551, 1697873161311732311),
+ (10313493231639821582, 1313665730009899186),
+ (12701016819766672773, 2032799256770390445),
];
pub static POW5_INV_OFFSETS: [u32; 19] = [
@@ -96,7 +96,6 @@ pub static DOUBLE_POW5_TABLE: [u64; 26] = [
];
// Computes 5^i in the form required by Ryū.
-#[cfg(integer128)]
#[cfg_attr(feature = "no-panic", inline)]
pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
let base = i / DOUBLE_POW5_TABLE.len() as u32;
@@ -112,7 +111,7 @@ pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
let b0 = m as u128 * mul.0 as u128;
let b2 = m as u128 * mul.1 as u128;
let delta = pow5bits(i as i32) - pow5bits(base2 as i32);
- debug_assert!(base < POW5_OFFSETS.len() as u32);
+ debug_assert!(i / 16 < POW5_OFFSETS.len() as u32);
let shifted_sum = (b0 >> delta)
+ (b2 << (64 - delta))
+ ((*POW5_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u128;
@@ -120,7 +119,6 @@ pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
}
// Computes 5^-i in the form required by Ryū.
-#[cfg(integer128)]
#[cfg_attr(feature = "no-panic", inline)]
pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) {
let base = (i + DOUBLE_POW5_TABLE.len() as u32 - 1) / DOUBLE_POW5_TABLE.len() as u32;
@@ -142,64 +140,3 @@ pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) {
+ ((*POW5_INV_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u128;
(shifted_sum as u64, (shifted_sum >> 64) as u64)
}
-
-// Computes 5^i in the form required by Ryū, and stores it in the given pointer.
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
- let base = i / DOUBLE_POW5_TABLE.len() as u32;
- let base2 = base * DOUBLE_POW5_TABLE.len() as u32;
- let offset = i - base2;
- debug_assert!(base < DOUBLE_POW5_SPLIT2.len() as u32);
- let mul = *DOUBLE_POW5_SPLIT2.get_unchecked(base as usize);
- if offset == 0 {
- return mul;
- }
- debug_assert!(offset < DOUBLE_POW5_TABLE.len() as u32);
- let m = *DOUBLE_POW5_TABLE.get_unchecked(offset as usize);
- let (low1, mut high1) = umul128(m, mul.1);
- let (low0, high0) = umul128(m, mul.0);
- let sum = high0 + low1;
- if sum < high0 {
- high1 += 1; // overflow into high1
- }
- // high1 | sum | low0
- let delta = pow5bits(i as i32) - pow5bits(base2 as i32);
- debug_assert!(base < POW5_OFFSETS.len() as u32);
- (
- shiftright128(low0, sum, delta as u32)
- + ((*POW5_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u64,
- shiftright128(sum, high1, delta as u32),
- )
-}
-
-// Computes 5^-i in the form required by Ryū, and stores it in the given pointer.
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) {
- let base = (i + DOUBLE_POW5_TABLE.len() as u32 - 1) / DOUBLE_POW5_TABLE.len() as u32;
- let base2 = base * DOUBLE_POW5_TABLE.len() as u32;
- let offset = base2 - i;
- debug_assert!(base < DOUBLE_POW5_INV_SPLIT2.len() as u32);
- let mul = *DOUBLE_POW5_INV_SPLIT2.get_unchecked(base as usize); // 1/5^base2
- if offset == 0 {
- return mul;
- }
- debug_assert!(offset < DOUBLE_POW5_TABLE.len() as u32);
- let m = *DOUBLE_POW5_TABLE.get_unchecked(offset as usize);
- let (low1, mut high1) = umul128(m, mul.1);
- let (low0, high0) = umul128(m, mul.0 - 1);
- let sum = high0 + low1;
- if sum < high0 {
- high1 += 1; // overflow into high1
- }
- // high1 | sum | low0
- let delta = pow5bits(base2 as i32) - pow5bits(i as i32);
- debug_assert!(base < POW5_INV_OFFSETS.len() as u32);
- (
- shiftright128(low0, sum, delta as u32)
- + 1
- + ((*POW5_INV_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u64,
- shiftright128(sum, high1, delta as u32),
- )
-}
diff --git a/src/lib.rs b/src/lib.rs
index db6ee16..0177efa 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,7 +29,9 @@
//! }
//! ```
//!
-//! ## Performance
+//! ## Performance (lower is better)
+//!
+//! ![performance](https://raw.githubusercontent.com/dtolnay/ryu/master/performance.png)
//!
//! You can run upstream's benchmarks with:
//!
@@ -62,20 +64,10 @@
//! $ cargo bench
//! ```
//!
-//! The benchmark shows Ryū approximately 4-10x faster than the standard library
+//! The benchmark shows Ryū approximately 2-5x faster than the standard library
//! across a range of f32 and f64 inputs. Measurements are in nanoseconds per
//! iteration; smaller is better.
//!
-//! | type=f32 | 0.0 | 0.1234 | 2.718281828459045 | f32::MAX |
-//! |:--------:|:----:|:------:|:-----------------:|:--------:|
-//! | RYU | 3ns | 28ns | 23ns | 22ns |
-//! | STD | 40ns | 106ns | 128ns | 110ns |
-//!
-//! | type=f64 | 0.0 | 0.1234 | 2.718281828459045 | f64::MAX |
-//! |:--------:|:----:|:------:|:-----------------:|:--------:|
-//! | RYU | 3ns | 50ns | 35ns | 32ns |
-//! | STD | 39ns | 105ns | 128ns | 202ns |
-//!
//! ## Formatting
//!
//! This library tends to produce more human-readable output than the standard
@@ -89,11 +81,25 @@
//! notation.
#![no_std]
-#![doc(html_root_url = "https://docs.rs/ryu/1.0.5")]
-#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))]
-#![cfg_attr(
- feature = "cargo-clippy",
- allow(cast_lossless, many_single_char_names, unreadable_literal,)
+#![doc(html_root_url = "https://docs.rs/ryu/1.0.9")]
+#![allow(
+ clippy::cast_lossless,
+ clippy::cast_possible_truncation,
+ clippy::cast_possible_wrap,
+ clippy::cast_sign_loss,
+ clippy::checked_conversions,
+ clippy::doc_markdown,
+ clippy::expl_impl_clone_on_copy,
+ clippy::if_not_else,
+ clippy::many_single_char_names,
+ clippy::missing_panics_doc,
+ clippy::module_name_repetitions,
+ clippy::must_use_candidate,
+ clippy::similar_names,
+ clippy::too_many_lines,
+ clippy::unreadable_literal,
+ clippy::unseparated_literal_suffix,
+ clippy::wildcard_imports
)]
mod buffer;
diff --git a/src/pretty/exponent.rs b/src/pretty/exponent.rs
index 84053d5..b72add5 100644
--- a/src/pretty/exponent.rs
+++ b/src/pretty/exponent.rs
@@ -14,11 +14,11 @@ pub unsafe fn write_exponent3(mut k: isize, mut result: *mut u8) -> usize {
if k >= 100 {
*result = b'0' + (k / 100) as u8;
k %= 100;
- let d = DIGIT_TABLE.get_unchecked(k as usize * 2);
+ let d = DIGIT_TABLE.as_ptr().offset(k * 2);
ptr::copy_nonoverlapping(d, result.offset(1), 2);
sign as usize + 3
} else if k >= 10 {
- let d = DIGIT_TABLE.get_unchecked(k as usize * 2);
+ let d = DIGIT_TABLE.as_ptr().offset(k * 2);
ptr::copy_nonoverlapping(d, result, 2);
sign as usize + 2
} else {
@@ -38,7 +38,7 @@ pub unsafe fn write_exponent2(mut k: isize, mut result: *mut u8) -> usize {
debug_assert!(k < 100);
if k >= 10 {
- let d = DIGIT_TABLE.get_unchecked(k as usize * 2);
+ let d = DIGIT_TABLE.as_ptr().offset(k * 2);
ptr::copy_nonoverlapping(d, result, 2);
sign as usize + 2
} else {
diff --git a/src/pretty/mantissa.rs b/src/pretty/mantissa.rs
index e5fc202..150c79c 100644
--- a/src/pretty/mantissa.rs
+++ b/src/pretty/mantissa.rs
@@ -15,10 +15,26 @@ pub unsafe fn write_mantissa_long(mut output: u64, mut result: *mut u8) {
let c1 = (c / 100) << 1;
let d0 = (d % 100) << 1;
let d1 = (d / 100) << 1;
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize), result.offset(-2), 2);
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize), result.offset(-4), 2);
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(d0 as usize), result.offset(-6), 2);
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(d1 as usize), result.offset(-8), 2);
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(c0 as isize),
+ result.offset(-2),
+ 2,
+ );
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(c1 as isize),
+ result.offset(-4),
+ 2,
+ );
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(d0 as isize),
+ result.offset(-6),
+ 2,
+ );
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(d1 as isize),
+ result.offset(-8),
+ 2,
+ );
result = result.offset(-8);
}
write_mantissa(output as u32, result);
@@ -31,19 +47,35 @@ pub unsafe fn write_mantissa(mut output: u32, mut result: *mut u8) {
output /= 10_000;
let c0 = (c % 100) << 1;
let c1 = (c / 100) << 1;
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize), result.offset(-2), 2);
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize), result.offset(-4), 2);
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(c0 as isize),
+ result.offset(-2),
+ 2,
+ );
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(c1 as isize),
+ result.offset(-4),
+ 2,
+ );
result = result.offset(-4);
}
if output >= 100 {
let c = ((output % 100) << 1) as u32;
output /= 100;
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize), result.offset(-2), 2);
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(c as isize),
+ result.offset(-2),
+ 2,
+ );
result = result.offset(-2);
}
if output >= 10 {
let c = (output << 1) as u32;
- ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize), result.offset(-2), 2);
+ ptr::copy_nonoverlapping(
+ DIGIT_TABLE.as_ptr().offset(c as isize),
+ result.offset(-2),
+ 2,
+ );
} else {
*result.offset(-1) = b'0' + output as u8;
}
diff --git a/src/pretty/mod.rs b/src/pretty/mod.rs
index a82692d..b196a11 100644
--- a/src/pretty/mod.rs
+++ b/src/pretty/mod.rs
@@ -6,7 +6,7 @@ use self::mantissa::*;
use crate::common;
use crate::d2s::{self, *};
use crate::f2s::*;
-use core::{mem, ptr};
+use core::ptr;
#[cfg(feature = "no-panic")]
use no_panic::no_panic;
@@ -50,7 +50,7 @@ use no_panic::no_panic;
#[must_use]
#[cfg_attr(feature = "no-panic", no_panic)]
pub unsafe fn format64(f: f64, result: *mut u8) -> usize {
- let bits = mem::transmute::<f64, u64>(f);
+ let bits = f.to_bits();
let sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1);
let ieee_exponent =
@@ -157,7 +157,7 @@ pub unsafe fn format64(f: f64, result: *mut u8) -> usize {
#[must_use]
#[cfg_attr(feature = "no-panic", no_panic)]
pub unsafe fn format32(f: f32, result: *mut u8) -> usize {
- let bits = mem::transmute::<f32, u32>(f);
+ let bits = f.to_bits();
let sign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
let ieee_mantissa = bits & ((1u32 << FLOAT_MANTISSA_BITS) - 1);
let ieee_exponent =
diff --git a/src/s2d.rs b/src/s2d.rs
index 3d3808d..152ca97 100644
--- a/src/s2d.rs
+++ b/src/s2d.rs
@@ -203,12 +203,13 @@ pub fn s2d(buffer: &[u8]) -> Result<f64, Error> {
let round_up = last_removed_bit != 0 && (!trailing_zeros || ((m2 >> shift) & 1) != 0);
let mut ieee_m2 = (m2 >> shift).wrapping_add(round_up as u64);
- if ieee_m2 == (1_u64 << (d2s::DOUBLE_MANTISSA_BITS + 1)) {
+ debug_assert!(ieee_m2 <= 1_u64 << (d2s::DOUBLE_MANTISSA_BITS + 1));
+ ieee_m2 &= (1_u64 << d2s::DOUBLE_MANTISSA_BITS) - 1;
+ if ieee_m2 == 0 && round_up {
// Due to how the IEEE represents +/-Infinity, we don't need to check
// for overflow here.
ieee_e2 += 1;
}
- ieee_m2 &= (1_u64 << d2s::DOUBLE_MANTISSA_BITS) - 1;
let ieee = ((((signed_m as u64) << d2s::DOUBLE_EXPONENT_BITS) | ieee_e2 as u64)
<< d2s::DOUBLE_MANTISSA_BITS)
| ieee_m2;
diff --git a/src/s2f.rs b/src/s2f.rs
index d9e0744..37c5417 100644
--- a/src/s2f.rs
+++ b/src/s2f.rs
@@ -153,13 +153,29 @@ pub fn s2f(buffer: &[u8]) -> Result<f32, Error> {
.wrapping_add(e10 as u32)
.wrapping_sub(ceil_log2_pow5(-e10) as u32)
.wrapping_sub(f2s::FLOAT_MANTISSA_BITS + 1) as i32;
+
+ // We now compute [m10 * 10^e10 / 2^e2] = [m10 / (5^(-e10) 2^(e2-e10))].
let j = e2
.wrapping_sub(e10)
.wrapping_add(ceil_log2_pow5(-e10))
.wrapping_sub(1)
.wrapping_add(f2s::FLOAT_POW5_INV_BITCOUNT);
m2 = mul_pow5_inv_div_pow2(m10, -e10 as u32, j);
- trailing_zeros = multiple_of_power_of_5_32(m10, -e10 as u32);
+
+ // We also compute if the result is exact, i.e.,
+ // [m10 / (5^(-e10) 2^(e2-e10))] == m10 / (5^(-e10) 2^(e2-e10))
+ //
+ // If e2-e10 >= 0, we need to check whether (5^(-e10) 2^(e2-e10))
+ // divides m10, which is the case iff pow5(m10) >= -e10 AND pow2(m10) >=
+ // e2-e10.
+ //
+ // If e2-e10 < 0, we have actually computed [m10 * 2^(e10 e2) /
+ // 5^(-e10)] above, and we need to check whether 5^(-e10) divides (m10 *
+ // 2^(e10-e2)), which is the case iff pow5(m10 * 2^(e10-e2)) = pow5(m10)
+ // >= -e10.
+ trailing_zeros = (e2 < e10
+ || (e2 - e10 < 32 && multiple_of_power_of_2_32(m10, (e2 - e10) as u32)))
+ && multiple_of_power_of_5_32(m10, -e10 as u32);
}
// Compute the final IEEE exponent.
@@ -194,12 +210,16 @@ pub fn s2f(buffer: &[u8]) -> Result<f32, Error> {
let round_up = last_removed_bit != 0 && (!trailing_zeros || ((m2 >> shift) & 1) != 0);
let mut ieee_m2 = (m2 >> shift).wrapping_add(round_up as u32);
- if ieee_m2 == (1_u32 << (f2s::FLOAT_MANTISSA_BITS + 1)) {
+ debug_assert!(ieee_m2 <= 1_u32 << (f2s::FLOAT_MANTISSA_BITS + 1));
+ ieee_m2 &= (1_u32 << f2s::FLOAT_MANTISSA_BITS) - 1;
+ if ieee_m2 == 0 && round_up {
+ // Rounding up may overflow the mantissa.
+ // In this case we move a trailing zero of the mantissa into the
+ // exponent.
// Due to how the IEEE represents +/-Infinity, we don't need to check
// for overflow here.
ieee_e2 += 1;
}
- ieee_m2 &= (1_u32 << f2s::FLOAT_MANTISSA_BITS) - 1;
let ieee = ((((signed_m as u32) << f2s::FLOAT_EXPONENT_BITS) | ieee_e2 as u32)
<< f2s::FLOAT_MANTISSA_BITS)
| ieee_m2;