10 files changed, 116 insertions, 256 deletions
diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs
index df21fe0..2ccd9b0 100644
--- a/src/buffer/mod.rs
+++ b/src/buffer/mod.rs
@@ -1,13 +1,12 @@
 use crate::raw;
-#[cfg(maybe_uninit)]
 use core::mem::MaybeUninit;
-use core::{mem, slice, str};
+use core::{slice, str};
 #[cfg(feature = "no-panic")]
 use no_panic::no_panic;
 
-const NAN: &'static str = "NaN";
-const INFINITY: &'static str = "inf";
-const NEG_INFINITY: &'static str = "-inf";
+const NAN: &str = "NaN";
+const INFINITY: &str = "inf";
+const NEG_INFINITY: &str = "-inf";
 
 /// Safe API for formatting floating point numbers to text.
 ///
@@ -19,10 +18,7 @@ const NEG_INFINITY: &'static str = "-inf";
 /// assert_eq!(printed, "1.234");
 /// ```
 pub struct Buffer {
-    #[cfg(maybe_uninit)]
     bytes: [MaybeUninit<u8>; 24],
-    #[cfg(not(maybe_uninit))]
-    bytes: [u8; 24],
 }
 
 impl Buffer {
@@ -31,14 +27,8 @@ impl Buffer {
     #[inline]
     #[cfg_attr(feature = "no-panic", no_panic)]
     pub fn new() -> Self {
-        // assume_init is safe here, since this is an array of MaybeUninit, which does not need
-        // to be initialized.
-        #[cfg(maybe_uninit)]
         let bytes = [MaybeUninit::<u8>::uninit(); 24];
-        #[cfg(not(maybe_uninit))]
-        let bytes = unsafe { mem::uninitialized() };
-
-        Buffer { bytes: bytes }
+        Buffer { bytes }
     }
 
     /// Print a floating point number into this buffer and return a reference to
@@ -125,7 +115,7 @@ impl Sealed for f32 {
     #[inline]
     fn is_nonfinite(self) -> bool {
         const EXP_MASK: u32 = 0x7f800000;
-        let bits = unsafe { mem::transmute::<f32, u32>(self) };
+        let bits = self.to_bits();
         bits & EXP_MASK == EXP_MASK
     }
 
@@ -134,7 +124,7 @@ impl Sealed for f32 {
     fn format_nonfinite(self) -> &'static str {
         const MANTISSA_MASK: u32 = 0x007fffff;
         const SIGN_MASK: u32 = 0x80000000;
-        let bits = unsafe { mem::transmute::<f32, u32>(self) };
+        let bits = self.to_bits();
         if bits & MANTISSA_MASK != 0 {
             NAN
         } else if bits & SIGN_MASK != 0 {
@@ -154,7 +144,7 @@ impl Sealed for f64 {
     #[inline]
     fn is_nonfinite(self) -> bool {
         const EXP_MASK: u64 = 0x7ff0000000000000;
-        let bits = unsafe { mem::transmute::<f64, u64>(self) };
+        let bits = self.to_bits();
         bits & EXP_MASK == EXP_MASK
     }
 
@@ -163,7 +153,7 @@ impl Sealed for f64 {
     fn format_nonfinite(self) -> &'static str {
         const MANTISSA_MASK: u64 = 0x000fffffffffffff;
         const SIGN_MASK: u64 = 0x8000000000000000;
-        let bits = unsafe { mem::transmute::<f64, u64>(self) };
+        let bits = self.to_bits();
         if bits & MANTISSA_MASK != 0 {
             NAN
         } else if bits & SIGN_MASK != 0 {
diff --git a/src/d2s.rs b/src/d2s.rs
index 862fd5f..392577a 100644
--- a/src/d2s.rs
+++ b/src/d2s.rs
@@ -24,9 +24,6 @@ pub use crate::d2s_full_table::*;
 use crate::d2s_intrinsics::*;
 #[cfg(feature = "small")]
 pub use crate::d2s_small_table::*;
-#[cfg(not(maybe_uninit))]
-use core::mem;
-#[cfg(maybe_uninit)]
 use core::mem::MaybeUninit;
 
 pub const DOUBLE_MANTISSA_BITS: u32 = 52;
@@ -117,14 +114,7 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
     let mut vr: u64;
     let mut vp: u64;
     let mut vm: u64;
-    #[cfg(not(maybe_uninit))]
-    {
-        vp = unsafe { mem::uninitialized() };
-        vm = unsafe { mem::uninitialized() };
-    }
-    #[cfg(maybe_uninit)]
     let mut vp_uninit: MaybeUninit<u64> = MaybeUninit::uninit();
-    #[cfg(maybe_uninit)]
     let mut vm_uninit: MaybeUninit<u64> = MaybeUninit::uninit();
     let e10: i32;
     let mut vm_is_trailing_zeros = false;
@@ -147,30 +137,13 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
                     DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize)
                 },
                 i as u32,
-                #[cfg(maybe_uninit)]
-                {
-                    vp_uninit.as_mut_ptr()
-                },
-                #[cfg(not(maybe_uninit))]
-                {
-                    &mut vp
-                },
-                #[cfg(maybe_uninit)]
-                {
-                    vm_uninit.as_mut_ptr()
-                },
-                #[cfg(not(maybe_uninit))]
-                {
-                    &mut vm
-                },
+                vp_uninit.as_mut_ptr(),
+                vm_uninit.as_mut_ptr(),
                 mm_shift,
             )
         };
-        #[cfg(maybe_uninit)]
-        {
-            vp = unsafe { vp_uninit.assume_init() };
-            vm = unsafe { vm_uninit.assume_init() };
-        }
+        vp = unsafe { vp_uninit.assume_init() };
+        vm = unsafe { vm_uninit.assume_init() };
         if q <= 21 {
             // This should use q <= 22, but I think 21 is also safe. Smaller values
             // may still be safe, but it's more difficult to reason about them.
@@ -206,30 +179,13 @@ pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
                     DOUBLE_POW5_SPLIT.get_unchecked(i as usize)
                 },
                 j as u32,
-                #[cfg(maybe_uninit)]
-                {
-                    vp_uninit.as_mut_ptr()
-                },
-                #[cfg(not(maybe_uninit))]
-                {
-                    &mut vp
-                },
-                #[cfg(maybe_uninit)]
-                {
-                    vm_uninit.as_mut_ptr()
-                },
-                #[cfg(not(maybe_uninit))]
-                {
-                    &mut vm
-                },
+                vp_uninit.as_mut_ptr(),
+                vm_uninit.as_mut_ptr(),
                 mm_shift,
             )
         };
-        #[cfg(maybe_uninit)]
-        {
-            vp = unsafe { vp_uninit.assume_init() };
-            vm = unsafe { vm_uninit.assume_init() };
-        }
+        vp = unsafe { vp_uninit.assume_init() };
+        vm = unsafe { vm_uninit.assume_init() };
         if q <= 1 {
             // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
             // mv = 4 * m2, so it always has at least two trailing 0 bits.
diff --git a/src/d2s_intrinsics.rs b/src/d2s_intrinsics.rs
index 918ccab..f244a4d 100644
--- a/src/d2s_intrinsics.rs
+++ b/src/d2s_intrinsics.rs
@@ -20,46 +20,6 @@
 
 use core::ptr;
 
-// Returns (lo, hi).
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub fn umul128(a: u64, b: u64) -> (u64, u64) {
-    let a_lo = a as u32;
-    let a_hi = (a >> 32) as u32;
-    let b_lo = b as u32;
-    let b_hi = (b >> 32) as u32;
-
-    let b00 = a_lo as u64 * b_lo as u64;
-    let b01 = a_lo as u64 * b_hi as u64;
-    let b10 = a_hi as u64 * b_lo as u64;
-    let b11 = a_hi as u64 * b_hi as u64;
-
-    let b00_lo = b00 as u32;
-    let b00_hi = (b00 >> 32) as u32;
-
-    let mid1 = b10 + b00_hi as u64;
-    let mid1_lo = mid1 as u32;
-    let mid1_hi = (mid1 >> 32) as u32;
-
-    let mid2 = b01 + mid1_lo as u64;
-    let mid2_lo = mid2 as u32;
-    let mid2_hi = (mid2 >> 32) as u32;
-
-    let p_hi = b11 + mid1_hi as u64 + mid2_hi as u64;
-    let p_lo = ((mid2_lo as u64) << 32) | b00_lo as u64;
-
-    (p_lo, p_hi)
-}
-
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub fn shiftright128(lo: u64, hi: u64, dist: u32) -> u64 {
-    // We don't need to handle the case dist >= 64 here (see above).
-    debug_assert!(dist > 0);
-    debug_assert!(dist < 64);
-    (hi << (64 - dist)) | (lo >> dist)
-}
-
 #[cfg_attr(feature = "no-panic", inline)]
 pub fn div5(x: u64) -> u64 {
     x / 5
@@ -107,7 +67,6 @@ pub fn multiple_of_power_of_2(value: u64, p: u32) -> bool {
     (value & ((1u64 << p) - 1)) == 0
 }
 
-#[cfg(integer128)]
 #[cfg_attr(feature = "no-panic", inline)]
 pub fn mul_shift_64(m: u64, mul: &(u64, u64), j: u32) -> u64 {
     let b0 = m as u128 * mul.0 as u128;
@@ -115,7 +74,6 @@ pub fn mul_shift_64(m: u64, mul: &(u64, u64), j: u32) -> u64 {
     (((b0 >> 64) + b2) >> (j - 64)) as u64
 }
 
-#[cfg(integer128)]
 #[cfg_attr(feature = "no-panic", inline)]
 pub unsafe fn mul_shift_all_64(
     m: u64,
@@ -129,43 +87,3 @@ pub unsafe fn mul_shift_all_64(
     ptr::write(vm, mul_shift_64(4 * m - 1 - mm_shift as u64, mul, j));
     mul_shift_64(4 * m, mul, j)
 }
-
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub unsafe fn mul_shift_all_64(
-    mut m: u64,
-    mul: &(u64, u64),
-    j: u32,
-    vp: *mut u64,
-    vm: *mut u64,
-    mm_shift: u32,
-) -> u64 {
-    m <<= 1;
-    // m is maximum 55 bits
-    let (lo, tmp) = umul128(m, mul.0);
-    let (mut mid, mut hi) = umul128(m, mul.1);
-    mid = mid.wrapping_add(tmp);
-    hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi
-
-    let lo2 = lo.wrapping_add(mul.0);
-    let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64);
-    let hi2 = hi.wrapping_add((mid2 < mid) as u64);
-    ptr::write(vp, shiftright128(mid2, hi2, j - 64 - 1));
-
-    if mm_shift == 1 {
-        let lo3 = lo.wrapping_sub(mul.0);
-        let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64);
-        let hi3 = hi.wrapping_sub((mid3 > mid) as u64);
-        ptr::write(vm, shiftright128(mid3, hi3, j - 64 - 1));
-    } else {
-        let lo3 = lo + lo;
-        let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64);
-        let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64);
-        let lo4 = lo3.wrapping_sub(mul.0);
-        let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64);
-        let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64);
-        ptr::write(vm, shiftright128(mid4, hi4, j - 64));
-    }
-
-    shiftright128(mid, hi, j - 64 - 1)
-}
diff --git a/src/d2s_small_table.rs b/src/d2s_small_table.rs
index 08519a2..262fc04 100644
--- a/src/d2s_small_table.rs
+++ b/src/d2s_small_table.rs
@@ -19,10 +19,8 @@
 // KIND, either express or implied.
 
 use crate::common::*;
-#[cfg(not(integer128))]
-use crate::d2s_intrinsics::*;
 
-pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 13] = [
+pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 15] = [
     (1, 2305843009213693952),
     (5955668970331000884, 1784059615882449851),
     (8982663654677661702, 1380349269358112757),
@@ -36,6 +34,8 @@ pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 13] = [
     (12533209867169019542, 1418129833677084982),
     (5577825024675947042, 2194449627517475473),
     (11006974540203867551, 1697873161311732311),
+    (10313493231639821582, 1313665730009899186),
+    (12701016819766672773, 2032799256770390445),
 ];
 
 pub static POW5_INV_OFFSETS: [u32; 19] = [
@@ -96,7 +96,6 @@ pub static DOUBLE_POW5_TABLE: [u64; 26] = [
 ];
 
 // Computes 5^i in the form required by Ryū.
-#[cfg(integer128)]
 #[cfg_attr(feature = "no-panic", inline)]
 pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
     let base = i / DOUBLE_POW5_TABLE.len() as u32;
@@ -112,7 +111,7 @@ pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
     let b0 = m as u128 * mul.0 as u128;
     let b2 = m as u128 * mul.1 as u128;
     let delta = pow5bits(i as i32) - pow5bits(base2 as i32);
-    debug_assert!(base < POW5_OFFSETS.len() as u32);
+    debug_assert!(i / 16 < POW5_OFFSETS.len() as u32);
     let shifted_sum = (b0 >> delta)
         + (b2 << (64 - delta))
         + ((*POW5_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u128;
@@ -120,7 +119,6 @@ pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
 }
 
 // Computes 5^-i in the form required by Ryū.
-#[cfg(integer128)]
 #[cfg_attr(feature = "no-panic", inline)]
 pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) {
     let base = (i + DOUBLE_POW5_TABLE.len() as u32 - 1) / DOUBLE_POW5_TABLE.len() as u32;
@@ -142,64 +140,3 @@ pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) {
         + ((*POW5_INV_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u128;
     (shifted_sum as u64, (shifted_sum >> 64) as u64)
 }
-
-// Computes 5^i in the form required by Ryū, and stores it in the given pointer.
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub unsafe fn compute_pow5(i: u32) -> (u64, u64) {
-    let base = i / DOUBLE_POW5_TABLE.len() as u32;
-    let base2 = base * DOUBLE_POW5_TABLE.len() as u32;
-    let offset = i - base2;
-    debug_assert!(base < DOUBLE_POW5_SPLIT2.len() as u32);
-    let mul = *DOUBLE_POW5_SPLIT2.get_unchecked(base as usize);
-    if offset == 0 {
-        return mul;
-    }
-    debug_assert!(offset < DOUBLE_POW5_TABLE.len() as u32);
-    let m = *DOUBLE_POW5_TABLE.get_unchecked(offset as usize);
-    let (low1, mut high1) = umul128(m, mul.1);
-    let (low0, high0) = umul128(m, mul.0);
-    let sum = high0 + low1;
-    if sum < high0 {
-        high1 += 1; // overflow into high1
-    }
-    // high1 | sum | low0
-    let delta = pow5bits(i as i32) - pow5bits(base2 as i32);
-    debug_assert!(base < POW5_OFFSETS.len() as u32);
-    (
-        shiftright128(low0, sum, delta as u32)
-            + ((*POW5_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u64,
-        shiftright128(sum, high1, delta as u32),
-    )
-}
-
-// Computes 5^-i in the form required by Ryū, and stores it in the given pointer.
-#[cfg(not(integer128))]
-#[cfg_attr(feature = "no-panic", inline)]
-pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64) {
-    let base = (i + DOUBLE_POW5_TABLE.len() as u32 - 1) / DOUBLE_POW5_TABLE.len() as u32;
-    let base2 = base * DOUBLE_POW5_TABLE.len() as u32;
-    let offset = base2 - i;
-    debug_assert!(base < DOUBLE_POW5_INV_SPLIT2.len() as u32);
-    let mul = *DOUBLE_POW5_INV_SPLIT2.get_unchecked(base as usize); // 1/5^base2
-    if offset == 0 {
-        return mul;
-    }
-    debug_assert!(offset < DOUBLE_POW5_TABLE.len() as u32);
-    let m = *DOUBLE_POW5_TABLE.get_unchecked(offset as usize);
-    let (low1, mut high1) = umul128(m, mul.1);
-    let (low0, high0) = umul128(m, mul.0 - 1);
-    let sum = high0 + low1;
-    if sum < high0 {
-        high1 += 1; // overflow into high1
-    }
-    // high1 | sum | low0
-    let delta = pow5bits(base2 as i32) - pow5bits(i as i32);
-    debug_assert!(base < POW5_INV_OFFSETS.len() as u32);
-    (
-        shiftright128(low0, sum, delta as u32)
-            + 1
-            + ((*POW5_INV_OFFSETS.get_unchecked((i / 16) as usize) >> ((i % 16) << 1)) & 3) as u64,
-        shiftright128(sum, high1, delta as u32),
-    )
-}
diff --git a/src/lib.rs b/src/lib.rs
index db6ee16..0177efa 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,7 +29,9 @@
 //! }
 //! ```
 //!
-//! ## Performance
+//! ## Performance (lower is better)
+//!
+//! ![performance](https://raw.githubusercontent.com/dtolnay/ryu/master/performance.png)
 //!
 //! You can run upstream's benchmarks with:
 //!
@@ -62,20 +64,10 @@
 //! $ cargo bench
 //! ```
 //!
-//! The benchmark shows Ryū approximately 4-10x faster than the standard library
+//! The benchmark shows Ryū approximately 2-5x faster than the standard library
 //! across a range of f32 and f64 inputs. Measurements are in nanoseconds per
 //! iteration; smaller is better.
 //!
-//! | type=f32 | 0.0  | 0.1234 | 2.718281828459045 | f32::MAX |
-//! |:--------:|:----:|:------:|:-----------------:|:--------:|
-//! | RYU      | 3ns  | 28ns   | 23ns              | 22ns     |
-//! | STD      | 40ns | 106ns  | 128ns             | 110ns    |
-//!
-//! | type=f64 | 0.0  | 0.1234 | 2.718281828459045 | f64::MAX |
-//! |:--------:|:----:|:------:|:-----------------:|:--------:|
-//! | RYU      | 3ns  | 50ns   | 35ns              | 32ns     |
-//! | STD      | 39ns | 105ns  | 128ns             | 202ns    |
-//!
 //! ## Formatting
 //!
 //! This library tends to produce more human-readable output than the standard
@@ -89,11 +81,25 @@
 //! notation.
 
 #![no_std]
-#![doc(html_root_url = "https://docs.rs/ryu/1.0.5")]
-#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(cast_lossless, many_single_char_names, unreadable_literal,)
+#![doc(html_root_url = "https://docs.rs/ryu/1.0.9")]
+#![allow(
+    clippy::cast_lossless,
+    clippy::cast_possible_truncation,
+    clippy::cast_possible_wrap,
+    clippy::cast_sign_loss,
+    clippy::checked_conversions,
+    clippy::doc_markdown,
+    clippy::expl_impl_clone_on_copy,
+    clippy::if_not_else,
+    clippy::many_single_char_names,
+    clippy::missing_panics_doc,
+    clippy::module_name_repetitions,
+    clippy::must_use_candidate,
+    clippy::similar_names,
+    clippy::too_many_lines,
+    clippy::unreadable_literal,
+    clippy::unseparated_literal_suffix,
+    clippy::wildcard_imports
 )]
 
 mod buffer;
diff --git a/src/pretty/exponent.rs b/src/pretty/exponent.rs
index 84053d5..b72add5 100644
--- a/src/pretty/exponent.rs
+++ b/src/pretty/exponent.rs
@@ -14,11 +14,11 @@ pub unsafe fn write_exponent3(mut k: isize, mut result: *mut u8) -> usize {
     if k >= 100 {
         *result = b'0' + (k / 100) as u8;
         k %= 100;
-        let d = DIGIT_TABLE.get_unchecked(k as usize * 2);
+        let d = DIGIT_TABLE.as_ptr().offset(k * 2);
         ptr::copy_nonoverlapping(d, result.offset(1), 2);
         sign as usize + 3
     } else if k >= 10 {
-        let d = DIGIT_TABLE.get_unchecked(k as usize * 2);
+        let d = DIGIT_TABLE.as_ptr().offset(k * 2);
         ptr::copy_nonoverlapping(d, result, 2);
         sign as usize + 2
     } else {
@@ -38,7 +38,7 @@ pub unsafe fn write_exponent2(mut k: isize, mut result: *mut u8) -> usize {
 
     debug_assert!(k < 100);
     if k >= 10 {
-        let d = DIGIT_TABLE.get_unchecked(k as usize * 2);
+        let d = DIGIT_TABLE.as_ptr().offset(k * 2);
         ptr::copy_nonoverlapping(d, result, 2);
         sign as usize + 2
     } else {
diff --git a/src/pretty/mantissa.rs b/src/pretty/mantissa.rs
index e5fc202..150c79c 100644
--- a/src/pretty/mantissa.rs
+++ b/src/pretty/mantissa.rs
@@ -15,10 +15,26 @@ pub unsafe fn write_mantissa_long(mut output: u64, mut result: *mut u8) {
         let c1 = (c / 100) << 1;
         let d0 = (d % 100) << 1;
         let d1 = (d / 100) << 1;
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize), result.offset(-2), 2);
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize), result.offset(-4), 2);
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(d0 as usize), result.offset(-6), 2);
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(d1 as usize), result.offset(-8), 2);
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(c0 as isize),
+            result.offset(-2),
+            2,
+        );
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(c1 as isize),
+            result.offset(-4),
+            2,
+        );
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(d0 as isize),
+            result.offset(-6),
+            2,
+        );
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(d1 as isize),
+            result.offset(-8),
+            2,
+        );
         result = result.offset(-8);
     }
     write_mantissa(output as u32, result);
@@ -31,19 +47,35 @@ pub unsafe fn write_mantissa(mut output: u32, mut result: *mut u8) {
         output /= 10_000;
         let c0 = (c % 100) << 1;
         let c1 = (c / 100) << 1;
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize), result.offset(-2), 2);
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize), result.offset(-4), 2);
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(c0 as isize),
+            result.offset(-2),
+            2,
+        );
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(c1 as isize),
+            result.offset(-4),
+            2,
+        );
         result = result.offset(-4);
     }
     if output >= 100 {
         let c = ((output % 100) << 1) as u32;
         output /= 100;
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize), result.offset(-2), 2);
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(c as isize),
+            result.offset(-2),
+            2,
+        );
         result = result.offset(-2);
     }
     if output >= 10 {
         let c = (output << 1) as u32;
-        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize), result.offset(-2), 2);
+        ptr::copy_nonoverlapping(
+            DIGIT_TABLE.as_ptr().offset(c as isize),
+            result.offset(-2),
+            2,
+        );
     } else {
         *result.offset(-1) = b'0' + output as u8;
     }
diff --git a/src/pretty/mod.rs b/src/pretty/mod.rs
index a82692d..b196a11 100644
--- a/src/pretty/mod.rs
+++ b/src/pretty/mod.rs
@@ -6,7 +6,7 @@ use self::mantissa::*;
 use crate::common;
 use crate::d2s::{self, *};
 use crate::f2s::*;
-use core::{mem, ptr};
+use core::ptr;
 #[cfg(feature = "no-panic")]
 use no_panic::no_panic;
 
@@ -50,7 +50,7 @@ use no_panic::no_panic;
 #[must_use]
 #[cfg_attr(feature = "no-panic", no_panic)]
 pub unsafe fn format64(f: f64, result: *mut u8) -> usize {
-    let bits = mem::transmute::<f64, u64>(f);
+    let bits = f.to_bits();
     let sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
     let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1);
     let ieee_exponent =
@@ -157,7 +157,7 @@ pub unsafe fn format64(f: f64, result: *mut u8) -> usize {
 #[must_use]
 #[cfg_attr(feature = "no-panic", no_panic)]
 pub unsafe fn format32(f: f32, result: *mut u8) -> usize {
-    let bits = mem::transmute::<f32, u32>(f);
+    let bits = f.to_bits();
     let sign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
     let ieee_mantissa = bits & ((1u32 << FLOAT_MANTISSA_BITS) - 1);
     let ieee_exponent =
diff --git a/src/s2d.rs b/src/s2d.rs
index 3d3808d..152ca97 100644
--- a/src/s2d.rs
+++ b/src/s2d.rs
@@ -203,12 +203,13 @@ pub fn s2d(buffer: &[u8]) -> Result<f64, Error> {
     let round_up = last_removed_bit != 0 && (!trailing_zeros || ((m2 >> shift) & 1) != 0);
 
     let mut ieee_m2 = (m2 >> shift).wrapping_add(round_up as u64);
-    if ieee_m2 == (1_u64 << (d2s::DOUBLE_MANTISSA_BITS + 1)) {
+    debug_assert!(ieee_m2 <= 1_u64 << (d2s::DOUBLE_MANTISSA_BITS + 1));
+    ieee_m2 &= (1_u64 << d2s::DOUBLE_MANTISSA_BITS) - 1;
+    if ieee_m2 == 0 && round_up {
         // Due to how the IEEE represents +/-Infinity, we don't need to check
         // for overflow here.
         ieee_e2 += 1;
     }
-    ieee_m2 &= (1_u64 << d2s::DOUBLE_MANTISSA_BITS) - 1;
     let ieee = ((((signed_m as u64) << d2s::DOUBLE_EXPONENT_BITS) | ieee_e2 as u64)
         << d2s::DOUBLE_MANTISSA_BITS)
         | ieee_m2;
diff --git a/src/s2f.rs b/src/s2f.rs
index d9e0744..37c5417 100644
--- a/src/s2f.rs
+++ b/src/s2f.rs
@@ -153,13 +153,29 @@ pub fn s2f(buffer: &[u8]) -> Result<f32, Error> {
             .wrapping_add(e10 as u32)
             .wrapping_sub(ceil_log2_pow5(-e10) as u32)
             .wrapping_sub(f2s::FLOAT_MANTISSA_BITS + 1) as i32;
+
+        // We now compute [m10 * 10^e10 / 2^e2] = [m10 / (5^(-e10) 2^(e2-e10))].
         let j = e2
             .wrapping_sub(e10)
             .wrapping_add(ceil_log2_pow5(-e10))
             .wrapping_sub(1)
             .wrapping_add(f2s::FLOAT_POW5_INV_BITCOUNT);
         m2 = mul_pow5_inv_div_pow2(m10, -e10 as u32, j);
-        trailing_zeros = multiple_of_power_of_5_32(m10, -e10 as u32);
+
+        // We also compute if the result is exact, i.e.,
+        //   [m10 / (5^(-e10) 2^(e2-e10))] == m10 / (5^(-e10) 2^(e2-e10))
+        //
+        // If e2-e10 >= 0, we need to check whether (5^(-e10) 2^(e2-e10))
+        // divides m10, which is the case iff pow5(m10) >= -e10 AND pow2(m10) >=
+        // e2-e10.
+        //
+        // If e2-e10 < 0, we have actually computed [m10 * 2^(e10 e2) /
+        // 5^(-e10)] above, and we need to check whether 5^(-e10) divides (m10 *
+        // 2^(e10-e2)), which is the case iff pow5(m10 * 2^(e10-e2)) = pow5(m10)
+        // >= -e10.
+        trailing_zeros = (e2 < e10
+            || (e2 - e10 < 32 && multiple_of_power_of_2_32(m10, (e2 - e10) as u32)))
+            && multiple_of_power_of_5_32(m10, -e10 as u32);
     }
 
     // Compute the final IEEE exponent.
@@ -194,12 +210,16 @@ pub fn s2f(buffer: &[u8]) -> Result<f32, Error> {
     let round_up = last_removed_bit != 0 && (!trailing_zeros || ((m2 >> shift) & 1) != 0);
 
     let mut ieee_m2 = (m2 >> shift).wrapping_add(round_up as u32);
-    if ieee_m2 == (1_u32 << (f2s::FLOAT_MANTISSA_BITS + 1)) {
+    debug_assert!(ieee_m2 <= 1_u32 << (f2s::FLOAT_MANTISSA_BITS + 1));
+    ieee_m2 &= (1_u32 << f2s::FLOAT_MANTISSA_BITS) - 1;
+    if ieee_m2 == 0 && round_up {
+        // Rounding up may overflow the mantissa.
+        // In this case we move a trailing zero of the mantissa into the
+        // exponent.
         // Due to how the IEEE represents +/-Infinity, we don't need to check
         // for overflow here.
         ieee_e2 += 1;
     }
-    ieee_m2 &= (1_u32 << f2s::FLOAT_MANTISSA_BITS) - 1;
     let ieee = ((((signed_m as u32) << f2s::FLOAT_EXPONENT_BITS) | ieee_e2 as u32)
         << f2s::FLOAT_MANTISSA_BITS)
         | ieee_m2;