38 files changed, 977 insertions, 606 deletions
diff --git a/src/ascii.rs b/src/ascii.rs
index bb2b679..259d41f 100644
--- a/src/ascii.rs
+++ b/src/ascii.rs
@@ -23,18 +23,18 @@ use core::mem;
 // means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
 // _mm_movemask_epi8.
 
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const USIZE_BYTES: usize = mem::size_of::<usize>();
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
 
 // This is a mask where the most significant bit of each byte in the usize
 // is set. We test this bit to determine whether a character is ASCII or not.
 // Namely, a single byte is regarded as an ASCII codepoint if and only if it's
 // most significant bit is not set.
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const ASCII_MASK_U64: u64 = 0x8080808080808080;
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
 
 /// Returns the index of the first non ASCII byte in the given slice.
@@ -42,18 +42,18 @@ const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
 /// If slice only contains ASCII bytes, then the length of the slice is
 /// returned.
 pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
-    #[cfg(not(target_arch = "x86_64"))]
+    #[cfg(any(miri, not(target_arch = "x86_64")))]
     {
         first_non_ascii_byte_fallback(slice)
     }
 
-    #[cfg(target_arch = "x86_64")]
+    #[cfg(all(not(miri), target_arch = "x86_64"))]
     {
         first_non_ascii_byte_sse2(slice)
     }
 }
 
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
     let align = USIZE_BYTES - 1;
     let start_ptr = slice.as_ptr();
@@ -115,7 +115,7 @@ fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
     }
 }
 
-#[cfg(target_arch = "x86_64")]
+#[cfg(all(not(miri), target_arch = "x86_64"))]
 fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
     use core::arch::x86_64::*;
 
@@ -221,7 +221,7 @@ unsafe fn first_non_ascii_byte_slow(
 /// bytes is not an ASCII byte.
 ///
 /// The position returned is always in the inclusive range [0, 7].
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 fn first_non_ascii_byte_mask(mask: usize) -> usize {
     #[cfg(target_endian = "little")]
     {
@@ -245,7 +245,7 @@ unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
     ptr.offset((amt as isize).wrapping_neg())
 }
 
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
     use core::ptr;
 
@@ -286,6 +286,7 @@ mod tests {
 
     #[test]
     #[cfg(target_arch = "x86_64")]
+    #[cfg(not(miri))]
     fn positive_sse2_forward() {
         for i in 0..517 {
             let b = "a".repeat(i).into_bytes();
@@ -294,6 +295,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(miri))]
     fn negative_fallback_forward() {
         for i in 0..517 {
             for align in 0..65 {
@@ -315,6 +317,7 @@ mod tests {
 
     #[test]
     #[cfg(target_arch = "x86_64")]
+    #[cfg(not(miri))]
     fn negative_sse2_forward() {
         for i in 0..517 {
             for align in 0..65 {
diff --git a/src/bstr.rs b/src/bstr.rs
index 1e3c91b..5036f06 100644
--- a/src/bstr.rs
+++ b/src/bstr.rs
@@ -1,5 +1,8 @@
 use core::mem;
 
+#[cfg(feature = "alloc")]
+use alloc::boxed::Box;
+
 /// A wrapper for `&[u8]` that provides convenient string oriented trait impls.
 ///
 /// If you need ownership or a growable byte string buffer, then use
@@ -33,8 +36,31 @@ pub struct BStr {
 }
 
 impl BStr {
+    /// Directly creates a `BStr` slice from anything that can be converted
+    /// to a byte slice.
+    ///
+    /// This is very similar to the [`B`](crate::B) function, except this
+    /// returns a `&BStr` instead of a `&[u8]`.
+    ///
+    /// This is a cost-free conversion.
+    ///
+    /// # Example
+    ///
+    /// You can create `BStr`'s from byte arrays, byte slices or even string
+    /// slices:
+    ///
+    /// ```
+    /// use bstr::BStr;
+    ///
+    /// let a = BStr::new(b"abc");
+    /// let b = BStr::new(&b"abc"[..]);
+    /// let c = BStr::new("abc");
+    ///
+    /// assert_eq!(a, b);
+    /// assert_eq!(a, c);
+    /// ```
     #[inline]
-    pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &BStr {
+    pub fn new<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a BStr {
         BStr::from_bytes(bytes.as_ref())
     }
 
@@ -56,13 +82,13 @@ impl BStr {
     }
 
     #[inline]
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     pub(crate) fn from_boxed_bytes(slice: Box<[u8]>) -> Box<BStr> {
         unsafe { Box::from_raw(Box::into_raw(slice) as _) }
     }
 
     #[inline]
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     pub(crate) fn into_boxed_bytes(slice: Box<BStr>) -> Box<[u8]> {
         unsafe { Box::from_raw(Box::into_raw(slice) as _) }
     }
diff --git a/src/bstring.rs b/src/bstring.rs
index 30093ba..d144b1d 100644
--- a/src/bstring.rs
+++ b/src/bstring.rs
@@ -1,3 +1,5 @@
+use alloc::vec::Vec;
+
 use crate::bstr::BStr;
 
 /// A wrapper for `Vec<u8>` that provides convenient string oriented trait
@@ -38,16 +40,43 @@ use crate::bstr::BStr;
 /// region of memory containing the bytes, a length and a capacity.
 #[derive(Clone, Hash)]
 pub struct BString {
-    pub(crate) bytes: Vec<u8>,
+    bytes: Vec<u8>,
 }
 
 impl BString {
+    /// Constructs a new `BString` from the given [`Vec`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bstr::BString;
+    ///
+    /// let mut b = BString::new(Vec::with_capacity(10));
+    /// ```
+    ///
+    /// This function is `const`:
+    ///
+    /// ```
+    /// use bstr::BString;
+    ///
+    /// const B: BString = BString::new(vec![]);
+    /// ```
+    #[inline]
+    pub const fn new(bytes: Vec<u8>) -> BString {
+        BString { bytes }
+    }
+
     #[inline]
     pub(crate) fn as_bytes(&self) -> &[u8] {
         &self.bytes
     }
 
     #[inline]
+    pub(crate) fn as_bytes_mut(&mut self) -> &mut [u8] {
+        &mut self.bytes
+    }
+
+    #[inline]
     pub(crate) fn as_bstr(&self) -> &BStr {
         BStr::new(&self.bytes)
     }
@@ -56,4 +85,19 @@ impl BString {
     pub(crate) fn as_mut_bstr(&mut self) -> &mut BStr {
         BStr::new_mut(&mut self.bytes)
     }
+
+    #[inline]
+    pub(crate) fn as_vec(&self) -> &Vec<u8> {
+        &self.bytes
+    }
+
+    #[inline]
+    pub(crate) fn as_vec_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.bytes
+    }
+
+    #[inline]
+    pub(crate) fn into_vec(self) -> Vec<u8> {
+        self.bytes
+    }
 }
diff --git a/src/byteset/mod.rs b/src/byteset/mod.rs
index 043d309..c6c697c 100644
--- a/src/byteset/mod.rs
+++ b/src/byteset/mod.rs
@@ -1,4 +1,5 @@
 use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
 mod scalar;
 
 #[inline]
@@ -79,7 +80,7 @@ pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std", not(miri)))]
 mod tests {
     quickcheck::quickcheck! {
         fn qc_byteset_forward_matches_naive(
diff --git a/src/byteset/scalar.rs b/src/byteset/scalar.rs
index 9bd34a8..28bff67 100644
--- a/src/byteset/scalar.rs
+++ b/src/byteset/scalar.rs
@@ -1,9 +1,8 @@
 // This is adapted from `fallback.rs` from rust-memchr. It's modified to return
-// the 'inverse' query of memchr, e.g. finding the first byte not in the provided
-// set. This is simple for the 1-byte case.
+// the 'inverse' query of memchr, e.g. finding the first byte not in the
+// provided set. This is simple for the 1-byte case.
 
-use core::cmp;
-use core::usize;
+use core::{cmp, usize};
 
 #[cfg(target_pointer_width = "32")]
 const USIZE_BYTES: usize = 4;
@@ -29,10 +28,11 @@ pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
     let loop_size = cmp::min(LOOP_SIZE, haystack.len());
     let align = USIZE_BYTES - 1;
     let start_ptr = haystack.as_ptr();
-    let end_ptr = haystack[haystack.len()..].as_ptr();
-    let mut ptr = start_ptr;
 
     unsafe {
+        let end_ptr = haystack.as_ptr().add(haystack.len());
+        let mut ptr = start_ptr;
+
         if haystack.len() < USIZE_BYTES {
             return forward_search(start_ptr, end_ptr, ptr, confirm);
         }
@@ -68,10 +68,11 @@ pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
     let loop_size = cmp::min(LOOP_SIZE, haystack.len());
     let align = USIZE_BYTES - 1;
     let start_ptr = haystack.as_ptr();
-    let end_ptr = haystack[haystack.len()..].as_ptr();
-    let mut ptr = end_ptr;
 
     unsafe {
+        let end_ptr = haystack.as_ptr().add(haystack.len());
+        let mut ptr = end_ptr;
+
         if haystack.len() < USIZE_BYTES {
             return reverse_search(start_ptr, end_ptr, ptr, confirm);
         }
@@ -81,7 +82,7 @@ pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
             return reverse_search(start_ptr, end_ptr, ptr, confirm);
         }
 
-        ptr = (end_ptr as usize & !align) as *const u8;
+        ptr = ptr.sub(end_ptr as usize & align);
         debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
         while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
             debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
@@ -174,9 +175,10 @@ pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>(
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
     use super::{inv_memchr, inv_memrchr};
+
     // search string, search byte, inv_memchr result, inv_memrchr result.
     // these are expanded into a much larger set of tests in build_tests
     const TESTS: &[(&[u8], u8, usize, usize)] = &[
@@ -192,10 +194,15 @@ mod tests {
     type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
 
     fn build_tests() -> Vec<TestCase> {
+        #[cfg(not(miri))]
+        const MAX_PER: usize = 515;
+        #[cfg(miri)]
+        const MAX_PER: usize = 10;
+
         let mut result = vec![];
         for &(search, byte, fwd_pos, rev_pos) in TESTS {
             result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
-            for i in 1..515 {
+            for i in 1..MAX_PER {
                 // add a bunch of copies of the search byte to the end.
                 let mut suffixed: Vec<u8> = search.into();
                 suffixed.extend(std::iter::repeat(byte).take(i));
@@ -225,7 +232,7 @@ mod tests {
         }
 
         // build non-matching tests for several sizes
-        for i in 0..515 {
+        for i in 0..MAX_PER {
             result.push((
                 std::iter::repeat(b'\0').take(i).collect(),
                 b'\0',
@@ -239,6 +246,12 @@ mod tests {
     #[test]
     fn test_inv_memchr() {
         use crate::{ByteSlice, B};
+
+        #[cfg(not(miri))]
+        const MAX_OFFSET: usize = 130;
+        #[cfg(miri)]
+        const MAX_OFFSET: usize = 13;
+
         for (search, byte, matching) in build_tests() {
             assert_eq!(
                 inv_memchr(byte, &search),
@@ -256,13 +269,14 @@ mod tests {
                 // better printing
                 B(&search).as_bstr(),
             );
-            // Test a rather large number off offsets for potential alignment issues
-            for offset in 1..130 {
+            // Test a rather large number off offsets for potential alignment
+            // issues.
+            for offset in 1..MAX_OFFSET {
                 if offset >= search.len() {
                     break;
                 }
-                // If this would cause us to shift the results off the end, skip
-                // it so that we don't have to recompute them.
+                // If this would cause us to shift the results off the end,
+                // skip it so that we don't have to recompute them.
                 if let Some((f, r)) = matching {
                     if offset > f || offset > r {
                         break;
diff --git a/src/ext_slice.rs b/src/ext_slice.rs
index 0cc73af..ec52a61 100644
--- a/src/ext_slice.rs
+++ b/src/ext_slice.rs
@@ -1,17 +1,16 @@
+use core::{iter, slice, str};
+
+#[cfg(all(feature = "alloc", feature = "unicode"))]
+use alloc::vec;
+#[cfg(feature = "alloc")]
+use alloc::{borrow::Cow, string::String, vec::Vec};
+
 #[cfg(feature = "std")]
-use std::borrow::Cow;
-#[cfg(feature = "std")]
-use std::ffi::OsStr;
-#[cfg(feature = "std")]
-use std::path::Path;
+use std::{ffi::OsStr, path::Path};
 
-use core::{iter, ops, ptr, slice, str};
 use memchr::{memchr, memmem, memrchr};
 
-use crate::ascii;
-use crate::bstr::BStr;
-use crate::byteset;
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 use crate::ext_vec::ByteVec;
 #[cfg(feature = "unicode")]
 use crate::unicode::{
@@ -19,7 +18,12 @@ use crate::unicode::{
     SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
     WordsWithBreaks,
 };
-use crate::utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error};
+use crate::{
+    ascii,
+    bstr::BStr,
+    byteset,
+    utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
+};
 
 /// A short-hand constructor for building a `&[u8]`.
 ///
@@ -149,11 +153,12 @@ pub trait ByteSlice: Sealed {
 
     /// Create an immutable byte string from an OS string slice.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns `None` if the given OS string is not valid UTF-8. (For
-    /// example, on Windows, file paths are allowed to be a sequence of
-    /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
-    /// valid UTF-8.)
+    /// When the underlying bytes of OS strings are accessible, then this
+    /// always succeeds and is zero cost. Otherwise, this returns `None` if the
+    /// given OS string is not valid UTF-8. (For example, when the underlying
+    /// bytes are inaccessible on Windows, file paths are allowed to be a
+    /// sequence of arbitrary 16-bit integers. Not all such sequences can be
+    /// transcoded to valid UTF-8.)
     ///
     /// # Examples
     ///
@@ -190,10 +195,12 @@ pub trait ByteSlice: Sealed {
 
     /// Create an immutable byte string from a file path.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns `None` if the given path is not valid UTF-8. (For example,
-    /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
-    /// integers. Not all such sequences can be transcoded to valid UTF-8.)
+    /// When the underlying bytes of paths are accessible, then this always
+    /// succeeds and is zero cost. Otherwise, this returns `None` if the given
+    /// path is not valid UTF-8. (For example, when the underlying bytes are
+    /// inaccessible on Windows, file paths are allowed to be a sequence of
+    /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
+    /// valid UTF-8.)
     ///
     /// # Examples
     ///
@@ -230,6 +237,7 @@ pub trait ByteSlice: Sealed {
     /// Basic usage:
     ///
     /// ```
+    /// # #[cfg(feature = "alloc")] {
     /// use bstr::{B, ByteSlice, ByteVec};
     ///
     /// # fn example() -> Result<(), bstr::Utf8Error> {
@@ -241,6 +249,7 @@ pub trait ByteSlice: Sealed {
     /// let err = bstring.to_str().unwrap_err();
     /// assert_eq!(8, err.valid_up_to());
     /// # Ok(()) }; example().unwrap()
+    /// # }
     /// ```
     #[inline]
     fn to_str(&self) -> Result<&str, Utf8Error> {
@@ -301,7 +310,7 @@ pub trait ByteSlice: Sealed {
     /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
     /// For a more precise description of the maximal subpart strategy, see
     /// the Unicode Standard, Chapter 3, Section 9. See also
-    /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
+    /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
     ///
     /// N.B. Rust's standard library also appears to use the same strategy,
     /// but it does not appear to be an API guarantee.
@@ -341,7 +350,7 @@ pub trait ByteSlice: Sealed {
     /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
     /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn to_str_lossy(&self) -> Cow<'_, str> {
         match utf8::validate(self.as_bytes()) {
@@ -398,7 +407,7 @@ pub trait ByteSlice: Sealed {
     /// bstring.to_str_lossy_into(&mut dest);
     /// assert_eq!("☃βツ\u{FFFD}", dest);
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn to_str_lossy_into(&self, dest: &mut String) {
         let mut bytes = self.as_bytes();
@@ -428,12 +437,15 @@ pub trait ByteSlice: Sealed {
 
     /// Create an OS string slice from this byte string.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns a UTF-8 decoding error if this byte string is not valid
-    /// UTF-8. (For example, on Windows, file paths are allowed to be a
-    /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
-    /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
-    /// 16-bit integers.)
+    /// When OS strings can be constructed from arbitrary byte sequences, this
+    /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
+    /// decoding error if this byte string is not valid UTF-8. (For example,
+    /// assuming the representation of `OsStr` is opaque on Windows, file paths
+    /// are allowed to be a sequence of arbitrary 16-bit integers. There is
+    /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
+    /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
+    /// is even opened up, then this will convert any sequence of bytes to an
+    /// `OsStr` without cost.)
     ///
     /// # Examples
     ///
@@ -467,13 +479,13 @@ pub trait ByteSlice: Sealed {
 
     /// Lossily create an OS string slice from this byte string.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this will perform a UTF-8 check and lossily convert this byte string
-    /// into valid UTF-8 using the Unicode replacement codepoint.
+    /// When OS strings can be constructed from arbitrary byte sequences, this
+    /// is zero cost and always returns a slice. Otherwise, this will perform a
+    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+    /// the Unicode replacement codepoint.
     ///
-    /// Note that this can prevent the correct roundtripping of file paths on
-    /// non-Unix systems such as Windows, where file paths are an arbitrary
-    /// sequence of 16-bit integers.
+    /// Note that this can prevent the correct roundtripping of file paths when
+    /// the representation of `OsStr` is opaque.
     ///
     /// # Examples
     ///
@@ -512,12 +524,15 @@ pub trait ByteSlice: Sealed {
 
     /// Create a path slice from this byte string.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns a UTF-8 decoding error if this byte string is not valid
-    /// UTF-8. (For example, on Windows, file paths are allowed to be a
-    /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
-    /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
-    /// 16-bit integers.)
+    /// When paths can be constructed from arbitrary byte sequences, this
+    /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
+    /// decoding error if this byte string is not valid UTF-8. (For example,
+    /// assuming the representation of `Path` is opaque on Windows, file paths
+    /// are allowed to be a sequence of arbitrary 16-bit integers. There is
+    /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
+    /// arbitrary sequence of 16-bit integers. If the representation of `Path`
+    /// is even opened up, then this will convert any sequence of bytes to an
+    /// `Path` without cost.)
     ///
     /// # Examples
     ///
@@ -537,13 +552,13 @@ pub trait ByteSlice: Sealed {
 
     /// Lossily create a path slice from this byte string.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this will perform a UTF-8 check and lossily convert this byte string
-    /// into valid UTF-8 using the Unicode replacement codepoint.
+    /// When paths can be constructed from arbitrary byte sequences, this is
+    /// zero cost and always returns a slice. Otherwise, this will perform a
+    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+    /// the Unicode replacement codepoint.
     ///
-    /// Note that this can prevent the correct roundtripping of file paths on
-    /// non-Unix systems such as Windows, where file paths are an arbitrary
-    /// sequence of 16-bit integers.
+    /// Note that this can prevent the correct roundtripping of file paths when
+    /// the representation of `Path` is opaque.
     ///
     /// # Examples
     ///
@@ -584,15 +599,10 @@ pub trait ByteSlice: Sealed {
     /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
     /// assert_eq!(b"foo".repeatn(0), B(""));
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn repeatn(&self, n: usize) -> Vec<u8> {
-        let bs = self.as_bytes();
-        let mut dst = vec![0; bs.len() * n];
-        for i in 0..n {
-            dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs);
-        }
-        dst
+        self.as_bytes().repeat(n)
     }
 
     /// Returns true if and only if this byte string contains the given needle.
@@ -759,10 +769,10 @@ pub trait ByteSlice: Sealed {
     /// assert_eq!(matches, vec![0]);
     /// ```
     #[inline]
-    fn find_iter<'a, B: ?Sized + AsRef<[u8]>>(
-        &'a self,
-        needle: &'a B,
-    ) -> Find<'a> {
+    fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
+        &'h self,
+        needle: &'n B,
+    ) -> Find<'h, 'n> {
         Find::new(self.as_bytes(), needle.as_ref())
     }
 
@@ -804,10 +814,10 @@ pub trait ByteSlice: Sealed {
     /// assert_eq!(matches, vec![0]);
     /// ```
     #[inline]
-    fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>(
-        &'a self,
-        needle: &'a B,
-    ) -> FindReverse<'a> {
+    fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
+        &'h self,
+        needle: &'n B,
+    ) -> FindReverse<'h, 'n> {
         FindReverse::new(self.as_bytes(), needle.as_ref())
     }
 
@@ -926,14 +936,17 @@ pub trait ByteSlice: Sealed {
     /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
     /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
     /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
+    /// // The empty byteset never matches.
+    /// assert_eq!(None, b"abc".find_byteset(b""));
+    /// assert_eq!(None, b"".find_byteset(b""));
     /// ```
     #[inline]
     fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
         byteset::find(self.as_bytes(), byteset.as_ref())
     }
 
-    /// Returns the index of the first occurrence of a byte that is not a member
-    /// of the provided set.
+    /// Returns the index of the first occurrence of a byte that is not a
+    /// member of the provided set.
     ///
     /// The `byteset` may be any type that can be cheaply converted into a
     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
@@ -963,6 +976,10 @@ pub trait ByteSlice: Sealed {
     /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
     /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
     /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
+    /// // The negation of the empty byteset matches everything.
+    /// assert_eq!(Some(0), b"abc".find_not_byteset(b""));
+    /// // But an empty string never contains anything.
+    /// assert_eq!(None, b"".find_not_byteset(b""));
     /// ```
     #[inline]
     fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
@@ -1043,8 +1060,9 @@ pub trait ByteSlice: Sealed {
         byteset::rfind_not(self.as_bytes(), byteset.as_ref())
     }
 
-    /// Returns an iterator over the fields in a byte string, separated by
-    /// contiguous whitespace.
+    /// Returns an iterator over the fields in a byte string, separated
+    /// by contiguous whitespace (according to the Unicode property
+    /// `White_Space`).
     ///
     /// # Example
     ///
@@ -1065,6 +1083,7 @@ pub trait ByteSlice: Sealed {
     ///
     /// assert_eq!(0, B("  \n\t\u{2003}\n  \t").fields().count());
     /// ```
+    #[cfg(feature = "unicode")]
     #[inline]
     fn fields(&self) -> Fields<'_> {
         Fields::new(self.as_bytes())
@@ -1191,10 +1210,10 @@ pub trait ByteSlice: Sealed {
     /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
     /// [`fields`](#method.fields) instead.
     #[inline]
-    fn split_str<'a, B: ?Sized + AsRef<[u8]>>(
-        &'a self,
-        splitter: &'a B,
-    ) -> Split<'a> {
+    fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+        &'h self,
+        splitter: &'s B,
+    ) -> Split<'h, 's> {
         Split::new(self.as_bytes(), splitter.as_ref())
     }
 
@@ -1285,13 +1304,101 @@ pub trait ByteSlice: Sealed {
     ///
     /// It does *not* give you `["a", "b", "c"]`.
     #[inline]
-    fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>(
-        &'a self,
-        splitter: &'a B,
-    ) -> SplitReverse<'a> {
+    fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+        &'h self,
+        splitter: &'s B,
+    ) -> SplitReverse<'h, 's> {
         SplitReverse::new(self.as_bytes(), splitter.as_ref())
     }
 
+    /// Split this byte string at the first occurance of `splitter`.
+    ///
+    /// If the `splitter` is found in the byte string, returns a tuple
+    /// containing the parts of the string before and after the first occurance
+    /// of `splitter` respectively. Otherwise, if there are no occurances of
+    /// `splitter` in the byte string, returns `None`.
+    ///
+    /// The splitter may be any type that can be cheaply converted into a
+    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// If you need to split on the *last* instance of a delimiter instead, see
+    /// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method .
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use bstr::{B, ByteSlice};
+    ///
+    /// assert_eq!(
+    ///     B("foo,bar").split_once_str(","),
+    ///     Some((B("foo"), B("bar"))),
+    /// );
+    /// assert_eq!(
+    ///     B("foo,bar,baz").split_once_str(","),
+    ///     Some((B("foo"), B("bar,baz"))),
+    /// );
+    /// assert_eq!(B("foo").split_once_str(","), None);
+    /// assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
+    /// assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
+    /// ```
+    #[inline]
+    fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
+        &'a self,
+        splitter: &B,
+    ) -> Option<(&'a [u8], &'a [u8])> {
+        let bytes = self.as_bytes();
+        let splitter = splitter.as_ref();
+        let start = Finder::new(splitter).find(bytes)?;
+        let end = start + splitter.len();
+        Some((&bytes[..start], &bytes[end..]))
+    }
+
+    /// Split this byte string at the last occurance of `splitter`.
+    ///
+    /// If the `splitter` is found in the byte string, returns a tuple
+    /// containing the parts of the string before and after the last occurance
+    /// of `splitter`, respectively. Otherwise, if there are no occurances of
+    /// `splitter` in the byte string, returns `None`.
+    ///
+    /// The splitter may be any type that can be cheaply converted into a
+    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// If you need to split on the *first* instance of a delimiter instead, see
+    /// the [`ByteSlice::split_once_str`](#method.split_once_str) method.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use bstr::{B, ByteSlice};
+    ///
+    /// assert_eq!(
+    ///     B("foo,bar").rsplit_once_str(","),
+    ///     Some((B("foo"), B("bar"))),
+    /// );
+    /// assert_eq!(
+    ///     B("foo,bar,baz").rsplit_once_str(","),
+    ///     Some((B("foo,bar"), B("baz"))),
+    /// );
+    /// assert_eq!(B("foo").rsplit_once_str(","), None);
+    /// assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
+    /// assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
+    /// ```
+    #[inline]
+    fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
+        &'a self,
+        splitter: &B,
+    ) -> Option<(&'a [u8], &'a [u8])> {
+        let bytes = self.as_bytes();
+        let splitter = splitter.as_ref();
+        let start = FinderReverse::new(splitter).rfind(bytes)?;
+        let end = start + splitter.len();
+        Some((&bytes[..start], &bytes[end..]))
+    }
+
     /// Returns an iterator of at most `limit` substrings of this byte string,
     /// separated by the given byte string. If `limit` substrings are yielded,
     /// then the last substring will contain the remainder of this byte string.
@@ -1328,11 +1435,11 @@ pub trait ByteSlice: Sealed {
     /// assert!(x.is_empty());
     /// ```
     #[inline]
-    fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>(
-        &'a self,
+    fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+        &'h self,
         limit: usize,
-        splitter: &'a B,
-    ) -> SplitN<'a> {
+        splitter: &'s B,
+    ) -> SplitN<'h, 's> {
         SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
     }
 
@@ -1374,11 +1481,11 @@ pub trait ByteSlice: Sealed {
     /// assert!(x.is_empty());
     /// ```
     #[inline]
-    fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>(
-        &'a self,
+    fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+        &'h self,
         limit: usize,
-        splitter: &'a B,
-    ) -> SplitNReverse<'a> {
+        splitter: &'s B,
+    ) -> SplitNReverse<'h, 's> {
         SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
     }
 
@@ -1416,7 +1523,7 @@ pub trait ByteSlice: Sealed {
     /// let s = b"foo".replace("", "Z");
     /// assert_eq!(s, "ZfZoZoZ".as_bytes());
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
         &self,
@@ -1462,7 +1569,7 @@ pub trait ByteSlice: Sealed {
     /// let s = b"foo".replacen("", "Z", 2);
     /// assert_eq!(s, "ZfZoo".as_bytes());
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
         &self,
@@ -1520,7 +1627,7 @@ pub trait ByteSlice: Sealed {
     /// s.replace_into("", "Z", &mut dest);
     /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
         &self,
@@ -1584,7 +1691,7 @@ pub trait ByteSlice: Sealed {
     /// s.replacen_into("", "Z", 2, &mut dest);
     /// assert_eq!(dest, "ZfZoo".as_bytes());
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
         &self,
@@ -1800,6 +1907,7 @@ pub trait ByteSlice: Sealed {
     /// not necessarily correspond to the length of the `&str` returned!
     ///
     /// ```
+    /// # #[cfg(all(feature = "alloc"))] {
     /// use bstr::{ByteSlice, ByteVec};
     ///
     /// let mut bytes = vec![];
@@ -1813,6 +1921,7 @@ pub trait ByteSlice: Sealed {
     ///     graphemes,
     ///     vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
     /// );
+    /// # }
     /// ```
     #[cfg(feature = "unicode")]
     #[inline]
@@ -2277,7 +2386,7 @@ pub trait ByteSlice: Sealed {
     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
     /// ```
-    #[cfg(all(feature = "std", feature = "unicode"))]
+    #[cfg(all(feature = "alloc", feature = "unicode"))]
     #[inline]
     fn to_lowercase(&self) -> Vec<u8> {
         let mut buf = vec![];
@@ -2339,7 +2448,7 @@ pub trait ByteSlice: Sealed {
     /// s.to_lowercase_into(&mut buf);
     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
     /// ```
-    #[cfg(all(feature = "std", feature = "unicode"))]
+    #[cfg(all(feature = "alloc", feature = "unicode"))]
     #[inline]
     fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
         // TODO: This is the best we can do given what std exposes I think.
@@ -2394,7 +2503,7 @@ pub trait ByteSlice: Sealed {
     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
     /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn to_ascii_lowercase(&self) -> Vec<u8> {
         self.as_bytes().to_ascii_lowercase()
@@ -2424,11 +2533,13 @@ pub trait ByteSlice: Sealed {
     /// Invalid UTF-8 remains as is:
     ///
     /// ```
+    /// # #[cfg(feature = "alloc")] {
     /// use bstr::{B, ByteSlice, ByteVec};
     ///
     /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
     /// s.make_ascii_lowercase();
     /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
+    /// # }
     /// ```
     #[inline]
     fn make_ascii_lowercase(&mut self) {
@@ -2480,7 +2591,7 @@ pub trait ByteSlice: Sealed {
     /// let s = B(b"foo\xFFbar\xE2\x98baz");
     /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
     /// ```
-    #[cfg(all(feature = "std", feature = "unicode"))]
+    #[cfg(all(feature = "alloc", feature = "unicode"))]
     #[inline]
     fn to_uppercase(&self) -> Vec<u8> {
         let mut buf = vec![];
@@ -2542,7 +2653,7 @@ pub trait ByteSlice: Sealed {
     /// s.to_uppercase_into(&mut buf);
     /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
     /// ```
-    #[cfg(all(feature = "std", feature = "unicode"))]
+    #[cfg(all(feature = "alloc", feature = "unicode"))]
     #[inline]
     fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
         // TODO: This is the best we can do given what std exposes I think.
@@ -2594,7 +2705,7 @@ pub trait ByteSlice: Sealed {
     /// let s = B(b"foo\xFFbar\xE2\x98baz");
     /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
     /// ```
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     #[inline]
     fn to_ascii_uppercase(&self) -> Vec<u8> {
         self.as_bytes().to_ascii_uppercase()
@@ -2624,11 +2735,13 @@ pub trait ByteSlice: Sealed {
     /// Invalid UTF-8 remains as is:
     ///
     /// ```
+    /// # #[cfg(feature = "alloc")] {
     /// use bstr::{B, ByteSlice, ByteVec};
     ///
     /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
     /// s.make_ascii_uppercase();
     /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+    /// # }
     /// ```
     #[inline]
     fn make_ascii_uppercase(&mut self) {
@@ -2900,72 +3013,6 @@ pub trait ByteSlice: Sealed {
             Some(index)
         }
     }
-
-    /// Copies elements from one part of the slice to another part of itself,
-    /// where the parts may be overlapping.
-    ///
-    /// `src` is the range within this byte string to copy from, while `dest`
-    /// is the starting index of the range within this byte string to copy to.
-    /// The length indicated by `src` must be less than or equal to the number
-    /// of bytes from `dest` to the end of the byte string.
-    ///
-    /// # Panics
-    ///
-    /// Panics if either range is out of bounds, or if `src` is too big to fit
-    /// into `dest`, or if the end of `src` is before the start.
-    ///
-    /// # Examples
-    ///
-    /// Copying four bytes within a byte string:
-    ///
-    /// ```
-    /// use bstr::{B, ByteSlice};
-    ///
-    /// let mut buf = *b"Hello, World!";
-    /// let s = &mut buf;
-    /// s.copy_within_str(1..5, 8);
-    /// assert_eq!(s, B("Hello, Wello!"));
-    /// ```
-    #[inline]
-    fn copy_within_str<R>(&mut self, src: R, dest: usize)
-    where
-        R: ops::RangeBounds<usize>,
-    {
-        // TODO: Deprecate this once slice::copy_within stabilizes.
-        let src_start = match src.start_bound() {
-            ops::Bound::Included(&n) => n,
-            ops::Bound::Excluded(&n) => {
-                n.checked_add(1).expect("attempted to index slice beyond max")
-            }
-            ops::Bound::Unbounded => 0,
-        };
-        let src_end = match src.end_bound() {
-            ops::Bound::Included(&n) => {
-                n.checked_add(1).expect("attempted to index slice beyond max")
-            }
-            ops::Bound::Excluded(&n) => n,
-            ops::Bound::Unbounded => self.as_bytes().len(),
-        };
-        assert!(src_start <= src_end, "src end is before src start");
-        assert!(src_end <= self.as_bytes().len(), "src is out of bounds");
-        let count = src_end - src_start;
-        assert!(
-            dest <= self.as_bytes().len() - count,
-            "dest is out of bounds",
-        );
-
-        // SAFETY: This is safe because we use ptr::copy to handle overlapping
-        // copies, and is also safe because we've checked all the bounds above.
-        // Finally, we are only dealing with u8 data, which is Copy, which
-        // means we can copy without worrying about ownership/destructors.
-        unsafe {
-            ptr::copy(
-                self.as_bytes().get_unchecked(src_start),
-                self.as_bytes_mut().get_unchecked_mut(dest),
-                count,
-            );
-        }
-    }
 }
 
 /// A single substring searcher fixed to a particular needle.
@@ -3138,22 +3185,22 @@ impl<'a> FinderReverse<'a> {
 ///
 /// Matches are reported by the byte offset at which they begin.
 ///
-/// `'a` is the shorter of two lifetimes: the byte string being searched or the
-/// byte string being looked for.
+/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
+/// needle.
 #[derive(Debug)]
-pub struct Find<'a> {
-    it: memmem::FindIter<'a, 'a>,
-    haystack: &'a [u8],
-    needle: &'a [u8],
+pub struct Find<'h, 'n> {
+    it: memmem::FindIter<'h, 'n>,
+    haystack: &'h [u8],
+    needle: &'n [u8],
 }
 
-impl<'a> Find<'a> {
-    fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
+impl<'h, 'n> Find<'h, 'n> {
+    fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
         Find { it: memmem::find_iter(haystack, needle), haystack, needle }
     }
 }
 
-impl<'a> Iterator for Find<'a> {
+impl<'h, 'n> Iterator for Find<'h, 'n> {
     type Item = usize;
 
     #[inline]
@@ -3166,17 +3213,17 @@ impl<'a> Iterator for Find<'a> {
 ///
 /// Matches are reported by the byte offset at which they begin.
 ///
-/// `'a` is the shorter of two lifetimes: the byte string being searched or the
-/// byte string being looked for.
+/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
+/// needle.
 #[derive(Debug)]
-pub struct FindReverse<'a> {
-    it: memmem::FindRevIter<'a, 'a>,
-    haystack: &'a [u8],
-    needle: &'a [u8],
+pub struct FindReverse<'h, 'n> {
+    it: memmem::FindRevIter<'h, 'n>,
+    haystack: &'h [u8],
+    needle: &'n [u8],
 }
 
-impl<'a> FindReverse<'a> {
-    fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
+impl<'h, 'n> FindReverse<'h, 'n> {
+    fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
         FindReverse {
             it: memmem::rfind_iter(haystack, needle),
             haystack,
@@ -3184,16 +3231,16 @@ impl<'a> FindReverse<'a> {
         }
     }
 
-    fn haystack(&self) -> &'a [u8] {
+    fn haystack(&self) -> &'h [u8] {
         self.haystack
     }
 
-    fn needle(&self) -> &[u8] {
+    fn needle(&self) -> &'n [u8] {
         self.needle
     }
 }
 
-impl<'a> Iterator for FindReverse<'a> {
+impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
     type Item = usize;
 
     #[inline]
@@ -3215,7 +3262,7 @@ impl<'a> Bytes<'a> {
     /// This has the same lifetime as the original slice,
     /// and so the iterator can continue to be used while this exists.
     #[inline]
-    pub fn as_slice(&self) -> &'a [u8] {
+    pub fn as_bytes(&self) -> &'a [u8] {
         self.it.as_slice()
     }
 }
@@ -3252,21 +3299,27 @@ impl<'a> iter::FusedIterator for Bytes<'a> {}
 
 /// An iterator over the fields in a byte string, separated by whitespace.
 ///
+/// Whitespace for this iterator is defined by the Unicode property
+/// `White_Space`.
+///
 /// This iterator splits on contiguous runs of whitespace, such that the fields
 /// in `foo\t\t\n  \nbar` are `foo` and `bar`.
 ///
 /// `'a` is the lifetime of the byte string being split.
+#[cfg(feature = "unicode")]
 #[derive(Debug)]
 pub struct Fields<'a> {
     it: FieldsWith<'a, fn(char) -> bool>,
 }
 
+#[cfg(feature = "unicode")]
 impl<'a> Fields<'a> {
     fn new(bytes: &'a [u8]) -> Fields<'a> {
         Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
     }
 }
 
+#[cfg(feature = "unicode")]
 impl<'a> Iterator for Fields<'a> {
     type Item = &'a [u8];
 
@@ -3328,10 +3381,11 @@ impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
 
 /// An iterator over substrings in a byte string, split by a separator.
 ///
-/// `'a` is the lifetime of the byte string being split.
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
 #[derive(Debug)]
-pub struct Split<'a> {
-    finder: Find<'a>,
+pub struct Split<'h, 's> {
+    finder: Find<'h, 's>,
     /// The end position of the previous match of our splitter. The element
     /// we yield corresponds to the substring starting at `last` up to the
     /// beginning of the next match of the splitter.
@@ -3342,18 +3396,18 @@ pub struct Split<'a> {
     done: bool,
 }
 
-impl<'a> Split<'a> {
-    fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> {
+impl<'h, 's> Split<'h, 's> {
+    fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
         let finder = haystack.find_iter(splitter);
         Split { finder, last: 0, done: false }
     }
 }
 
-impl<'a> Iterator for Split<'a> {
-    type Item = &'a [u8];
+impl<'h, 's> Iterator for Split<'h, 's> {
+    type Item = &'h [u8];
 
     #[inline]
-    fn next(&mut self) -> Option<&'a [u8]> {
+    fn next(&mut self) -> Option<&'h [u8]> {
         let haystack = self.finder.haystack;
         match self.finder.next() {
             Some(start) => {
@@ -3383,11 +3437,11 @@ impl<'a> Iterator for Split<'a> {
 /// An iterator over substrings in a byte string, split by a separator, in
 /// reverse.
 ///
-/// `'a` is the lifetime of the byte string being split, while `F` is the type
-/// of the predicate, i.e., `FnMut(char) -> bool`.
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
 #[derive(Debug)]
-pub struct SplitReverse<'a> {
-    finder: FindReverse<'a>,
+pub struct SplitReverse<'h, 's> {
+    finder: FindReverse<'h, 's>,
     /// The end position of the previous match of our splitter. The element
     /// we yield corresponds to the substring starting at `last` up to the
     /// beginning of the next match of the splitter.
@@ -3398,18 +3452,18 @@ pub struct SplitReverse<'a> {
     done: bool,
 }
 
-impl<'a> SplitReverse<'a> {
-    fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> {
+impl<'h, 's> SplitReverse<'h, 's> {
+    fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
         let finder = haystack.rfind_iter(splitter);
         SplitReverse { finder, last: haystack.len(), done: false }
     }
 }
 
-impl<'a> Iterator for SplitReverse<'a> {
-    type Item = &'a [u8];
+impl<'h, 's> Iterator for SplitReverse<'h, 's> {
+    type Item = &'h [u8];
 
     #[inline]
-    fn next(&mut self) -> Option<&'a [u8]> {
+    fn next(&mut self) -> Option<&'h [u8]> {
         let haystack = self.finder.haystack();
         match self.finder.next() {
             Some(start) => {
@@ -3440,31 +3494,31 @@ impl<'a> Iterator for SplitReverse<'a> {
 /// An iterator over at most `n` substrings in a byte string, split by a
 /// separator.
 ///
-/// `'a` is the lifetime of the byte string being split, while `F` is the type
-/// of the predicate, i.e., `FnMut(char) -> bool`.
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
 #[derive(Debug)]
-pub struct SplitN<'a> {
-    split: Split<'a>,
+pub struct SplitN<'h, 's> {
+    split: Split<'h, 's>,
     limit: usize,
     count: usize,
 }
 
-impl<'a> SplitN<'a> {
+impl<'h, 's> SplitN<'h, 's> {
     fn new(
-        haystack: &'a [u8],
-        splitter: &'a [u8],
+        haystack: &'h [u8],
+        splitter: &'s [u8],
         limit: usize,
-    ) -> SplitN<'a> {
+    ) -> SplitN<'h, 's> {
         let split = haystack.split_str(splitter);
         SplitN { split, limit, count: 0 }
     }
 }
 
-impl<'a> Iterator for SplitN<'a> {
-    type Item = &'a [u8];
+impl<'h, 's> Iterator for SplitN<'h, 's> {
+    type Item = &'h [u8];
 
     #[inline]
-    fn next(&mut self) -> Option<&'a [u8]> {
+    fn next(&mut self) -> Option<&'h [u8]> {
         self.count += 1;
         if self.count > self.limit || self.split.done {
             None
@@ -3479,31 +3533,31 @@ impl<'a> Iterator for SplitN<'a> {
 /// An iterator over at most `n` substrings in a byte string, split by a
 /// separator, in reverse.
 ///
-/// `'a` is the lifetime of the byte string being split, while `F` is the type
-/// of the predicate, i.e., `FnMut(char) -> bool`.
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
 #[derive(Debug)]
-pub struct SplitNReverse<'a> {
-    split: SplitReverse<'a>,
+pub struct SplitNReverse<'h, 's> {
+    split: SplitReverse<'h, 's>,
     limit: usize,
     count: usize,
 }
 
-impl<'a> SplitNReverse<'a> {
+impl<'h, 's> SplitNReverse<'h, 's> {
     fn new(
-        haystack: &'a [u8],
-        splitter: &'a [u8],
+        haystack: &'h [u8],
+        splitter: &'s [u8],
         limit: usize,
-    ) -> SplitNReverse<'a> {
+    ) -> SplitNReverse<'h, 's> {
         let split = haystack.rsplit_str(splitter);
         SplitNReverse { split, limit, count: 0 }
     }
 }
 
-impl<'a> Iterator for SplitNReverse<'a> {
-    type Item = &'a [u8];
+impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
+    type Item = &'h [u8];
 
     #[inline]
-    fn next(&mut self) -> Option<&'a [u8]> {
+    fn next(&mut self) -> Option<&'h [u8]> {
         self.count += 1;
         if self.count > self.limit || self.split.done {
             None
@@ -3521,6 +3575,7 @@ impl<'a> Iterator for SplitNReverse<'a> {
 /// `\n`.
 ///
 /// `'a` is the lifetime of the byte string being iterated over.
+#[derive(Clone, Debug)]
 pub struct Lines<'a> {
     it: LinesWithTerminator<'a>,
 }
@@ -3529,6 +3584,28 @@ impl<'a> Lines<'a> {
     fn new(bytes: &'a [u8]) -> Lines<'a> {
         Lines { it: LinesWithTerminator::new(bytes) }
     }
+
+    /// Return a copy of the rest of the underlying bytes without affecting the
+    /// iterator itself.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use bstr::{B, ByteSlice};
+    ///
+    /// let s = b"\
+    /// foo
+    /// bar\r
+    /// baz";
+    /// let mut lines = s.lines();
+    /// assert_eq!(lines.next(), Some(B("foo")));
+    /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
+    /// ```
+    pub fn as_bytes(&self) -> &'a [u8] {
+        self.it.bytes
+    }
 }
 
 impl<'a> Iterator for Lines<'a> {
@@ -3536,17 +3613,19 @@ impl<'a> Iterator for Lines<'a> {
 
     #[inline]
     fn next(&mut self) -> Option<&'a [u8]> {
-        let mut line = self.it.next()?;
-        if line.last_byte() == Some(b'\n') {
-            line = &line[..line.len() - 1];
-            if line.last_byte() == Some(b'\r') {
-                line = &line[..line.len() - 1];
-            }
-        }
-        Some(line)
+        Some(trim_last_terminator(self.it.next()?))
+    }
+}
+
+impl<'a> DoubleEndedIterator for Lines<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        Some(trim_last_terminator(self.it.next_back()?))
     }
 }
 
+impl<'a> iter::FusedIterator for Lines<'a> {}
+
 /// An iterator over all lines in a byte string, including their terminators.
 ///
 /// For this iterator, the only line terminator recognized is `\n`. (Since
@@ -3560,6 +3639,7 @@ impl<'a> Iterator for Lines<'a> {
 /// the original byte string.
 ///
 /// `'a` is the lifetime of the byte string being iterated over.
+#[derive(Clone, Debug)]
 pub struct LinesWithTerminator<'a> {
     bytes: &'a [u8],
 }
@@ -3568,6 +3648,28 @@ impl<'a> LinesWithTerminator<'a> {
     fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
         LinesWithTerminator { bytes }
     }
+
+    /// Return a copy of the rest of the underlying bytes without affecting the
+    /// iterator itself.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use bstr::{B, ByteSlice};
+    ///
+    /// let s = b"\
+    /// foo
+    /// bar\r
+    /// baz";
+    /// let mut lines = s.lines_with_terminator();
+    /// assert_eq!(lines.next(), Some(B("foo\n")));
+    /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
+    /// ```
+    pub fn as_bytes(&self) -> &'a [u8] {
+        self.bytes
+    }
 }
 
 impl<'a> Iterator for LinesWithTerminator<'a> {
@@ -3591,10 +3693,43 @@ impl<'a> Iterator for LinesWithTerminator<'a> {
     }
 }
 
-#[cfg(test)]
+impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let end = self.bytes.len().checked_sub(1)?;
+        match self.bytes[..end].rfind_byte(b'\n') {
+            None => {
+                let line = self.bytes;
+                self.bytes = b"";
+                Some(line)
+            }
+            Some(end) => {
+                let line = &self.bytes[end + 1..];
+                self.bytes = &self.bytes[..end + 1];
+                Some(line)
+            }
+        }
+    }
+}
+
+impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
+
+fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
+    if s.last_byte() == Some(b'\n') {
+        s = &s[..s.len() - 1];
+        if s.last_byte() == Some(b'\r') {
+            s = &s[..s.len() - 1];
+        }
+    }
+    s
+}
+
+#[cfg(all(test, feature = "std"))]
 mod tests {
-    use crate::ext_slice::{ByteSlice, B};
-    use crate::tests::LOSSY_TESTS;
+    use crate::{
+        ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
+        tests::LOSSY_TESTS,
+    };
 
     #[test]
     fn to_str_lossy() {
@@ -3622,34 +3757,55 @@ mod tests {
     }
 
     #[test]
-    #[should_panic]
-    fn copy_within_fail1() {
-        let mut buf = *b"foobar";
-        let s = &mut buf;
-        s.copy_within_str(0..2, 5);
-    }
+    fn lines_iteration() {
+        macro_rules! t {
+            ($it:expr, $forward:expr) => {
+                let mut res: Vec<&[u8]> = Vec::from($forward);
+                assert_eq!($it.collect::<Vec<_>>(), res);
+                res.reverse();
+                assert_eq!($it.rev().collect::<Vec<_>>(), res);
+            };
+        }
 
-    #[test]
-    #[should_panic]
-    fn copy_within_fail2() {
-        let mut buf = *b"foobar";
-        let s = &mut buf;
-        s.copy_within_str(3..2, 0);
-    }
+        t!(Lines::new(b""), []);
+        t!(LinesWithTerminator::new(b""), []);
 
-    #[test]
-    #[should_panic]
-    fn copy_within_fail3() {
-        let mut buf = *b"foobar";
-        let s = &mut buf;
-        s.copy_within_str(5..7, 0);
-    }
+        t!(Lines::new(b"\n"), [B("")]);
+        t!(Lines::new(b"\r\n"), [B("")]);
+        t!(LinesWithTerminator::new(b"\n"), [B("\n")]);
 
-    #[test]
-    #[should_panic]
-    fn copy_within_fail4() {
-        let mut buf = *b"foobar";
-        let s = &mut buf;
-        s.copy_within_str(0..1, 6);
+        t!(Lines::new(b"a"), [B("a")]);
+        t!(LinesWithTerminator::new(b"a"), [B("a")]);
+
+        t!(Lines::new(b"abc"), [B("abc")]);
+        t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
+
+        t!(Lines::new(b"abc\n"), [B("abc")]);
+        t!(Lines::new(b"abc\r\n"), [B("abc")]);
+        t!(LinesWithTerminator::new(b"abc\n"), [B("abc\n")]);
+
+        t!(Lines::new(b"abc\n\n"), [B("abc"), B("")]);
+        t!(LinesWithTerminator::new(b"abc\n\n"), [B("abc\n"), B("\n")]);
+
+        t!(Lines::new(b"abc\n\ndef"), [B("abc"), B(""), B("def")]);
+        t!(
+            LinesWithTerminator::new(b"abc\n\ndef"),
+            [B("abc\n"), B("\n"), B("def")]
+        );
+
+        t!(Lines::new(b"abc\n\ndef\n"), [B("abc"), B(""), B("def")]);
+        t!(
+            LinesWithTerminator::new(b"abc\n\ndef\n"),
+            [B("abc\n"), B("\n"), B("def\n")]
+        );
+
+        t!(Lines::new(b"\na\nb\n"), [B(""), B("a"), B("b")]);
+        t!(
+            LinesWithTerminator::new(b"\na\nb\n"),
+            [B("\n"), B("a\n"), B("b\n")]
+        );
+
+        t!(Lines::new(b"\n\n\n"), [B(""), B(""), B("")]);
+        t!(LinesWithTerminator::new(b"\n\n\n"), [B("\n"), B("\n"), B("\n")]);
     }
 }
diff --git a/src/ext_vec.rs b/src/ext_vec.rs
index 5beb0e1..5effdd0 100644
--- a/src/ext_vec.rs
+++ b/src/ext_vec.rs
@@ -1,16 +1,21 @@
-use std::borrow::Cow;
-use std::error;
-use std::ffi::{OsStr, OsString};
-use std::fmt;
-use std::iter;
-use std::ops;
-use std::path::{Path, PathBuf};
-use std::ptr;
-use std::str;
-use std::vec;
-
-use crate::ext_slice::ByteSlice;
-use crate::utf8::{self, Utf8Error};
+use core::fmt;
+use core::iter;
+use core::ops;
+use core::ptr;
+
+use alloc::{borrow::Cow, string::String, vec, vec::Vec};
+
+#[cfg(feature = "std")]
+use std::{
+    error,
+    ffi::{OsStr, OsString},
+    path::{Path, PathBuf},
+};
+
+use crate::{
+    ext_slice::ByteSlice,
+    utf8::{self, Utf8Error},
+};
 
 /// Concatenate the elements given by the iterator together into a single
 /// `Vec<u8>`.
@@ -154,8 +159,9 @@ pub trait ByteVec: Sealed {
 
     /// Create a new byte string from an owned OS string.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns the original OS string if it is not valid UTF-8.
+    /// When the underlying bytes of OS strings are accessible, then this
+    /// always succeeds and is zero cost. Otherwise, this returns the given
+    /// `OsString` if it is not valid UTF-8.
     ///
     /// # Examples
     ///
@@ -171,6 +177,7 @@ pub trait ByteVec: Sealed {
     /// assert_eq!(bs, B("foo"));
     /// ```
     #[inline]
+    #[cfg(feature = "std")]
     fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
         #[cfg(unix)]
         #[inline]
@@ -191,10 +198,11 @@ pub trait ByteVec: Sealed {
 
     /// Lossily create a new byte string from an OS string slice.
     ///
-    /// On Unix, this always succeeds, is zero cost and always returns a slice.
-    /// On non-Unix systems, this does a UTF-8 check. If the given OS string
-    /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8
-    /// (with invalid bytes replaced by the Unicode replacement codepoint).
+    /// When the underlying bytes of OS strings are accessible, then this is
+    /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
+    /// performed and if the given OS string is not valid UTF-8, then it is
+    /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
+    /// Unicode replacement codepoint).
     ///
     /// # Examples
     ///
@@ -210,6 +218,7 @@ pub trait ByteVec: Sealed {
     /// assert_eq!(bs, B("foo"));
     /// ```
     #[inline]
+    #[cfg(feature = "std")]
     fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
         #[cfg(unix)]
         #[inline]
@@ -233,8 +242,9 @@ pub trait ByteVec: Sealed {
 
     /// Create a new byte string from an owned file path.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns the original path if it is not valid UTF-8.
+    /// When the underlying bytes of paths are accessible, then this always
+    /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
+    /// if it is not valid UTF-8.
     ///
     /// # Examples
     ///
@@ -250,16 +260,18 @@ pub trait ByteVec: Sealed {
     /// assert_eq!(bs, B("foo"));
     /// ```
     #[inline]
+    #[cfg(feature = "std")]
     fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
         Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
     }
 
     /// Lossily create a new byte string from a file path.
     ///
-    /// On Unix, this always succeeds, is zero cost and always returns a slice.
-    /// On non-Unix systems, this does a UTF-8 check. If the given path is not
-    /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid
-    /// bytes replaced by the Unicode replacement codepoint).
+    /// When the underlying bytes of paths are accessible, then this is
+    /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
+    /// performed and if the given path is not valid UTF-8, then it is lossily
+    /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
+    /// replacement codepoint).
     ///
     /// # Examples
     ///
@@ -275,6 +287,7 @@ pub trait ByteVec: Sealed {
     /// assert_eq!(bs, B("foo"));
     /// ```
     #[inline]
+    #[cfg(feature = "std")]
     fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
         Vec::from_os_str_lossy(path.as_os_str())
     }
@@ -363,12 +376,10 @@ pub trait ByteVec: Sealed {
     /// ```
     /// use bstr::ByteVec;
     ///
-    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
     /// let bytes = Vec::from("hello");
-    /// let string = bytes.into_string()?;
+    /// let string = bytes.into_string().unwrap();
     ///
     /// assert_eq!("hello", string);
-    /// # Ok(()) }; example().unwrap()
     /// ```
     ///
     /// If this byte string is not valid UTF-8, then an error will be returned.
@@ -469,8 +480,9 @@ pub trait ByteVec: Sealed {
 
     /// Converts this byte string into an OS string, in place.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns the original byte string if it is not valid UTF-8.
+    /// When OS strings can be constructed from arbitrary byte sequences, this
+    /// always succeeds and is zero cost. Otherwise, if this byte string is not
+    /// valid UTF-8, then an error (with the original byte string) is returned.
     ///
     /// # Examples
     ///
@@ -485,14 +497,15 @@ pub trait ByteVec: Sealed {
     /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
     /// assert_eq!(os_str, OsStr::new("foo"));
     /// ```
+    #[cfg(feature = "std")]
     #[inline]
-    fn into_os_string(self) -> Result<OsString, Vec<u8>>
+    fn into_os_string(self) -> Result<OsString, FromUtf8Error>
     where
         Self: Sized,
     {
         #[cfg(unix)]
         #[inline]
-        fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
+        fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
             use std::os::unix::ffi::OsStringExt;
 
             Ok(OsString::from_vec(v))
@@ -500,11 +513,8 @@ pub trait ByteVec: Sealed {
 
         #[cfg(not(unix))]
         #[inline]
-        fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
-            match v.into_string() {
-                Ok(s) => Ok(OsString::from(s)),
-                Err(err) => Err(err.into_vec()),
-            }
+        fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
+            v.into_string().map(OsString::from)
         }
 
         imp(self.into_vec())
@@ -512,13 +522,13 @@ pub trait ByteVec: Sealed {
 
     /// Lossily converts this byte string into an OS string, in place.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this will perform a UTF-8 check and lossily convert this byte string
-    /// into valid UTF-8 using the Unicode replacement codepoint.
+    /// When OS strings can be constructed from arbitrary byte sequences, this
+    /// is zero cost and always returns a slice. Otherwise, this will perform a
+    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+    /// the Unicode replacement codepoint.
     ///
-    /// Note that this can prevent the correct roundtripping of file paths on
-    /// non-Unix systems such as Windows, where file paths are an arbitrary
-    /// sequence of 16-bit integers.
+    /// Note that this can prevent the correct roundtripping of file paths when
+    /// the representation of `OsString` is opaque.
     ///
     /// # Examples
     ///
@@ -532,6 +542,7 @@ pub trait ByteVec: Sealed {
     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
     /// ```
     #[inline]
+    #[cfg(feature = "std")]
     fn into_os_string_lossy(self) -> OsString
     where
         Self: Sized,
@@ -555,8 +566,9 @@ pub trait ByteVec: Sealed {
 
     /// Converts this byte string into an owned file path, in place.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this returns the original byte string if it is not valid UTF-8.
+    /// When paths can be constructed from arbitrary byte sequences, this
+    /// always succeeds and is zero cost. Otherwise, if this byte string is not
+    /// valid UTF-8, then an error (with the original byte string) is returned.
     ///
     /// # Examples
     ///
@@ -569,8 +581,9 @@ pub trait ByteVec: Sealed {
     /// let path = bs.into_path_buf().expect("should be valid UTF-8");
     /// assert_eq!(path.as_os_str(), "foo");
     /// ```
+    #[cfg(feature = "std")]
     #[inline]
-    fn into_path_buf(self) -> Result<PathBuf, Vec<u8>>
+    fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
     where
         Self: Sized,
     {
@@ -579,13 +592,13 @@ pub trait ByteVec: Sealed {
 
     /// Lossily converts this byte string into an owned file path, in place.
     ///
-    /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
-    /// this will perform a UTF-8 check and lossily convert this byte string
-    /// into valid UTF-8 using the Unicode replacement codepoint.
+    /// When paths can be constructed from arbitrary byte sequences, this is
+    /// zero cost and always returns a slice. Otherwise, this will perform a
+    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+    /// the Unicode replacement codepoint.
     ///
-    /// Note that this can prevent the correct roundtripping of file paths on
-    /// non-Unix systems such as Windows, where file paths are an arbitrary
-    /// sequence of 16-bit integers.
+    /// Note that this can prevent the correct roundtripping of file paths when
+    /// the representation of `PathBuf` is opaque.
     ///
     /// # Examples
     ///
@@ -599,6 +612,7 @@ pub trait ByteVec: Sealed {
     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
     /// ```
     #[inline]
+    #[cfg(feature = "std")]
     fn into_path_buf_lossy(self) -> PathBuf
     where
         Self: Sized,
@@ -1029,6 +1043,7 @@ impl FromUtf8Error {
     }
 }
 
+#[cfg(feature = "std")]
 impl error::Error for FromUtf8Error {
     #[inline]
     fn description(&self) -> &str {
@@ -1043,7 +1058,7 @@ impl fmt::Display for FromUtf8Error {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
     use crate::ext_vec::ByteVec;
 
diff --git a/src/impls.rs b/src/impls.rs
index 85a27ba..669aee6 100644
--- a/src/impls.rs
+++ b/src/impls.rs
@@ -18,7 +18,7 @@ macro_rules! impl_partial_eq {
     };
 }
 
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 macro_rules! impl_partial_eq_cow {
     ($lhs:ty, $rhs:ty) => {
         impl<'a, 'b> PartialEq<$rhs> for $lhs {
@@ -59,17 +59,22 @@ macro_rules! impl_partial_ord {
     };
 }
 
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 mod bstring {
-    use std::borrow::{Borrow, Cow, ToOwned};
-    use std::cmp::Ordering;
-    use std::fmt;
-    use std::iter::FromIterator;
-    use std::ops;
+    use core::{
+        cmp::Ordering, convert::TryFrom, fmt, iter::FromIterator, ops,
+    };
 
-    use crate::bstr::BStr;
-    use crate::bstring::BString;
-    use crate::ext_vec::ByteVec;
+    use alloc::{
+        borrow::{Borrow, Cow, ToOwned},
+        string::String,
+        vec,
+        vec::Vec,
+    };
+
+    use crate::{
+        bstr::BStr, bstring::BString, ext_slice::ByteSlice, ext_vec::ByteVec,
+    };
 
     impl fmt::Display for BString {
         #[inline]
@@ -90,21 +95,21 @@ mod bstring {
 
         #[inline]
         fn deref(&self) -> &Vec<u8> {
-            &self.bytes
+            self.as_vec()
         }
     }
 
     impl ops::DerefMut for BString {
         #[inline]
         fn deref_mut(&mut self) -> &mut Vec<u8> {
-            &mut self.bytes
+            self.as_vec_mut()
         }
     }
 
     impl AsRef<[u8]> for BString {
         #[inline]
         fn as_ref(&self) -> &[u8] {
-            &self.bytes
+            self.as_bytes()
         }
     }
 
@@ -118,7 +123,7 @@ mod bstring {
     impl AsMut<[u8]> for BString {
         #[inline]
         fn as_mut(&mut self) -> &mut [u8] {
-            &mut self.bytes
+            self.as_bytes_mut()
         }
     }
 
@@ -161,14 +166,14 @@ mod bstring {
     impl From<Vec<u8>> for BString {
         #[inline]
         fn from(s: Vec<u8>) -> BString {
-            BString { bytes: s }
+            BString::new(s)
         }
     }
 
     impl From<BString> for Vec<u8> {
         #[inline]
         fn from(s: BString) -> Vec<u8> {
-            s.bytes
+            s.into_vec()
         }
     }
 
@@ -200,6 +205,24 @@ mod bstring {
         }
     }
 
+    impl TryFrom<BString> for String {
+        type Error = crate::FromUtf8Error;
+
+        #[inline]
+        fn try_from(s: BString) -> Result<String, crate::FromUtf8Error> {
+            s.into_vec().into_string()
+        }
+    }
+
+    impl<'a> TryFrom<&'a BString> for &'a str {
+        type Error = crate::Utf8Error;
+
+        #[inline]
+        fn try_from(s: &'a BString) -> Result<&'a str, crate::Utf8Error> {
+            s.as_bytes().to_str()
+        }
+    }
+
     impl FromIterator<char> for BString {
         #[inline]
         fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> BString {
@@ -279,7 +302,7 @@ mod bstring {
     impl PartialOrd for BString {
         #[inline]
         fn partial_cmp(&self, other: &BString) -> Option<Ordering> {
-            PartialOrd::partial_cmp(&self.bytes, &other.bytes)
+            PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes())
         }
     }
 
@@ -301,15 +324,12 @@ mod bstring {
 }
 
 mod bstr {
-    #[cfg(feature = "std")]
-    use std::borrow::Cow;
+    use core::{cmp::Ordering, convert::TryFrom, fmt, ops};
 
-    use core::cmp::Ordering;
-    use core::fmt;
-    use core::ops;
+    #[cfg(feature = "alloc")]
+    use alloc::{borrow::Cow, boxed::Box, string::String, vec::Vec};
 
-    use crate::bstr::BStr;
-    use crate::ext_slice::ByteSlice;
+    use crate::{bstr::BStr, ext_slice::ByteSlice};
 
     impl fmt::Display for BStr {
         #[inline]
@@ -590,6 +610,13 @@ mod bstr {
         }
     }
 
+    impl<'a> From<&'a BStr> for &'a [u8] {
+        #[inline]
+        fn from(s: &'a BStr) -> &'a [u8] {
+            BStr::as_bytes(s)
+        }
+    }
+
     impl<'a> From<&'a str> for &'a BStr {
         #[inline]
         fn from(s: &'a str) -> &'a BStr {
@@ -597,7 +624,7 @@ mod bstr {
         }
     }
 
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl<'a> From<&'a BStr> for Cow<'a, BStr> {
         #[inline]
         fn from(s: &'a BStr) -> Cow<'a, BStr> {
@@ -605,7 +632,7 @@ mod bstr {
         }
     }
 
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl From<Box<[u8]>> for Box<BStr> {
         #[inline]
         fn from(s: Box<[u8]>) -> Box<BStr> {
@@ -613,7 +640,7 @@ mod bstr {
         }
     }
 
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl From<Box<BStr>> for Box<[u8]> {
         #[inline]
         fn from(s: Box<BStr>) -> Box<[u8]> {
@@ -621,6 +648,25 @@ mod bstr {
         }
     }
 
+    impl<'a> TryFrom<&'a BStr> for &'a str {
+        type Error = crate::Utf8Error;
+
+        #[inline]
+        fn try_from(s: &'a BStr) -> Result<&'a str, crate::Utf8Error> {
+            s.as_bytes().to_str()
+        }
+    }
+
+    #[cfg(feature = "alloc")]
+    impl<'a> TryFrom<&'a BStr> for String {
+        type Error = crate::Utf8Error;
+
+        #[inline]
+        fn try_from(s: &'a BStr) -> Result<String, crate::Utf8Error> {
+            Ok(s.as_bytes().to_str()?.into())
+        }
+    }
+
     impl Eq for BStr {}
 
     impl PartialEq<BStr> for BStr {
@@ -635,19 +681,19 @@ mod bstr {
     impl_partial_eq!(BStr, str);
     impl_partial_eq!(BStr, &'a str);
 
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq!(BStr, Vec<u8>);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq!(&'a BStr, Vec<u8>);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq!(BStr, String);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq!(&'a BStr, String);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq_cow!(&'a BStr, Cow<'a, BStr>);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq_cow!(&'a BStr, Cow<'a, str>);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_eq_cow!(&'a BStr, Cow<'a, [u8]>);
 
     impl PartialOrd for BStr {
@@ -669,17 +715,17 @@ mod bstr {
     impl_partial_ord!(BStr, str);
     impl_partial_ord!(BStr, &'a str);
 
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_ord!(BStr, Vec<u8>);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_ord!(&'a BStr, Vec<u8>);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_ord!(BStr, String);
-    #[cfg(feature = "std")]
+    #[cfg(feature = "alloc")]
     impl_partial_ord!(&'a BStr, String);
 }
 
-#[cfg(feature = "serde1-nostd")]
+#[cfg(feature = "serde")]
 mod bstr_serde {
     use core::fmt;
 
@@ -737,10 +783,11 @@ mod bstr_serde {
     }
 }
 
-#[cfg(feature = "serde1")]
+#[cfg(all(feature = "serde", feature = "alloc"))]
 mod bstring_serde {
-    use std::cmp;
-    use std::fmt;
+    use core::{cmp, fmt};
+
+    use alloc::{string::String, vec::Vec};
 
     use serde::{
         de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer,
@@ -825,8 +872,9 @@ mod bstring_serde {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod display {
+    #[cfg(not(miri))]
     use crate::bstring::BString;
     use crate::ByteSlice;
 
@@ -926,6 +974,7 @@ mod display {
         );
     }
 
+    #[cfg(not(miri))]
     quickcheck::quickcheck! {
         fn total_length(bstr: BString) -> bool {
             let size = bstr.chars().count();
@@ -934,7 +983,7 @@ mod display {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "alloc"))]
 mod bstring_arbitrary {
     use crate::bstring::BString;
 
@@ -946,12 +995,13 @@ mod bstring_arbitrary {
         }
 
         fn shrink(&self) -> Box<dyn Iterator<Item = BString>> {
-            Box::new(self.bytes.shrink().map(BString::from))
+            Box::new(self.as_vec().shrink().map(BString::from))
         }
     }
 }
 
 #[test]
+#[cfg(feature = "std")]
 fn test_debug() {
     use crate::{ByteSlice, B};
 
@@ -973,10 +1023,12 @@ fn test_debug() {
 
 // See: https://github.com/BurntSushi/bstr/issues/82
 #[test]
+#[cfg(feature = "std")]
 fn test_cows_regression() {
-    use crate::ByteSlice;
     use std::borrow::Cow;
 
+    use crate::ByteSlice;
+
     let c1 = Cow::from(b"hello bstr".as_bstr());
     let c2 = b"goodbye bstr".as_bstr();
     assert_ne!(c1, c2);
diff --git a/src/io.rs b/src/io.rs
index ad6f3c1..1386bf3 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -7,10 +7,11 @@ facilities for conveniently and efficiently working with lines as byte strings.
 More APIs may be added in the future.
 */
 
+use alloc::{vec, vec::Vec};
+
 use std::io;
 
-use crate::ext_slice::ByteSlice;
-use crate::ext_vec::ByteVec;
+use crate::{ext_slice::ByteSlice, ext_vec::ByteVec};
 
 /// An extention trait for
 /// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
@@ -36,7 +37,7 @@ pub trait BufReadExt: io::BufRead {
     /// use bstr::io::BufReadExt;
     ///
     /// # fn example() -> Result<(), io::Error> {
-    /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+    /// let mut cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
     ///
     /// let mut lines = vec![];
     /// for result in cursor.byte_lines() {
@@ -79,7 +80,7 @@ pub trait BufReadExt: io::BufRead {
     /// use bstr::io::BufReadExt;
     ///
     /// # fn example() -> Result<(), io::Error> {
-    /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+    /// let mut cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
     ///
     /// let mut records = vec![];
     /// for result in cursor.byte_records(b'\x00') {
@@ -122,7 +123,7 @@ pub trait BufReadExt: io::BufRead {
     /// use bstr::io::BufReadExt;
     ///
     /// # fn example() -> Result<(), io::Error> {
-    /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+    /// let mut cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
     ///
     /// let mut lines = vec![];
     /// cursor.for_byte_line(|line| {
@@ -135,7 +136,7 @@ pub trait BufReadExt: io::BufRead {
     /// assert_eq!(lines[2], "dolor".as_bytes());
     /// # Ok(()) }; example().unwrap()
     /// ```
-    fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
+    fn for_byte_line<F>(&mut self, mut for_each_line: F) -> io::Result<()>
     where
         Self: Sized,
         F: FnMut(&[u8]) -> io::Result<bool>,
@@ -169,7 +170,7 @@ pub trait BufReadExt: io::BufRead {
     /// use bstr::io::BufReadExt;
     ///
     /// # fn example() -> Result<(), io::Error> {
-    /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+    /// let mut cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
     ///
     /// let mut records = vec![];
     /// cursor.for_byte_record(b'\x00', |record| {
@@ -183,7 +184,7 @@ pub trait BufReadExt: io::BufRead {
     /// # Ok(()) }; example().unwrap()
     /// ```
     fn for_byte_record<F>(
-        self,
+        &mut self,
         terminator: u8,
         mut for_each_record: F,
     ) -> io::Result<()>
@@ -223,7 +224,7 @@ pub trait BufReadExt: io::BufRead {
     /// use bstr::io::BufReadExt;
     ///
     /// # fn example() -> Result<(), io::Error> {
-    /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+    /// let mut cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
     ///
     /// let mut lines = vec![];
     /// cursor.for_byte_line_with_terminator(|line| {
@@ -237,7 +238,7 @@ pub trait BufReadExt: io::BufRead {
     /// # Ok(()) }; example().unwrap()
     /// ```
     fn for_byte_line_with_terminator<F>(
-        self,
+        &mut self,
         for_each_line: F,
     ) -> io::Result<()>
     where
@@ -269,11 +270,10 @@ pub trait BufReadExt: io::BufRead {
     /// ```
     /// use std::io;
     ///
-    /// use bstr::B;
-    /// use bstr::io::BufReadExt;
+    /// use bstr::{io::BufReadExt, B};
     ///
     /// # fn example() -> Result<(), io::Error> {
-    /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+    /// let mut cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
     ///
     /// let mut records = vec![];
     /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
@@ -287,7 +287,7 @@ pub trait BufReadExt: io::BufRead {
     /// # Ok(()) }; example().unwrap()
     /// ```
     fn for_byte_record_with_terminator<F>(
-        mut self,
+        &mut self,
         terminator: u8,
         mut for_each_record: F,
     ) -> io::Result<()>
@@ -438,11 +438,12 @@ fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
     record
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
-    use super::BufReadExt;
     use crate::bstring::BString;
 
+    use super::BufReadExt;
+
     fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
         let mut lines = vec![];
         slice
diff --git a/src/lib.rs b/src/lib.rs
index 41142c9..09e17b0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -52,23 +52,27 @@ Here's another example showing how to do a search and replace (and also showing
 use of the `B` function):
 
 ```
+# #[cfg(feature = "alloc")] {
 use bstr::{B, ByteSlice};
 
 let old = B("foo ☃☃☃ foo foo quux foo");
 let new = old.replace("foo", "hello");
 assert_eq!(new, B("hello ☃☃☃ hello hello quux hello"));
+# }
 ```
 
 And here's an example that shows case conversion, even in the presence of
 invalid UTF-8:
 
 ```
+# #[cfg(all(feature = "alloc", feature = "unicode"))] {
 use bstr::{ByteSlice, ByteVec};
 
 let mut lower = Vec::from("hello β");
 lower[0] = b'\xFF';
 // lowercase β is uppercased to Β
 assert_eq!(lower.to_uppercase(), b"\xFFELLO \xCE\x92");
+# }
 ```
 
 # Convenient debug representation
@@ -98,10 +102,8 @@ method converts any `&[u8]` to a `&BStr`.
 
 # When should I use byte strings?
 
-This library reflects my hypothesis that UTF-8 by convention is a better trade
-off in some circumstances than guaranteed UTF-8. It's possible, perhaps even
-likely, that this is a niche concern for folks working closely with core text
-primitives.
+This library reflects my belief that UTF-8 by convention is a better trade
+off in some circumstances than guaranteed UTF-8.
 
 The first time this idea hit me was in the implementation of Rust's regex
 engine. In particular, very little of the internal implementation cares at all
@@ -134,24 +136,26 @@ incremental way by only parsing chunks at a time, but this is often complex to
 do or impractical. For example, many regex engines only accept one contiguous
 sequence of bytes at a time with no way to perform incremental matching.
 
-In summary, conventional UTF-8 byte strings provided by this library are
-definitely useful in some limited circumstances, but how useful they are more
-broadly isn't clear yet.
-
 # `bstr` in public APIs
 
-Since this library is not yet `1.0`, you should not use it in the public API of
-your crates until it hits `1.0` (unless you're OK with with tracking breaking
-releases of `bstr`). It is expected that `bstr 1.0` will be released before
-2022.
+This library is past version `1` and is expected to remain at version `1` for
+the foreseeable future. Therefore, it is encouraged to put types from `bstr`
+(like `BStr` and `BString`) in your public API if that makes sense for your
+crate.
+
+With that said, in general, it should be possible to avoid putting anything
+in this crate into your public APIs. Namely, you should never need to use the
+`ByteSlice` or `ByteVec` traits as bounds on public APIs, since their only
+purpose is to extend the methods on the concrete types `[u8]` and `Vec<u8>`,
+respectively. Similarly, it should not be necessary to put either the `BStr` or
+`BString` types into public APIs. If you want to use them internally, then they
+can be converted to/from `[u8]`/`Vec<u8>` as needed. The conversions are free.
+
+So while it shouldn't ever be 100% necessary to make `bstr` a public
+dependency, there may be cases where it is convenient to do so. This is an
+explicitly supported use case of `bstr`, and as such, major version releases
+should be exceptionally rare.
 
-In general, it should be possible to avoid putting anything in this crate into
-your public APIs. Namely, you should never need to use the `ByteSlice` or
-`ByteVec` traits as bounds on public APIs, since their only purpose is to
-extend the methods on the concrete types `[u8]` and `Vec<u8>`, respectively.
-Similarly, it should not be necessary to put either the `BStr` or `BString`
-types into public APIs. If you want to use them internally, then they can
-be converted to/from `[u8]`/`Vec<u8>` as needed.
 
 # Differences with standard strings
 
@@ -318,7 +322,8 @@ they can do:
    by accessing their underlying 16-bit integer representation. Unfortunately,
    this isn't zero cost (it introduces a second WTF-8 decoding step) and it's
    not clear this is a good thing to do, since WTF-8 should ideally remain an
-   internal implementation detail.
+   internal implementation detail. This is roughly the approach taken by the
+   [`os_str_bytes`](https://crates.io/crates/os_str_bytes) crate.
 2. One could instead declare that they will not handle paths on Windows that
    are not valid UTF-16, and return an error when one is encountered.
 3. Like (2), but instead of returning an error, lossily decode the file path
@@ -365,19 +370,57 @@ UTF-8, and thus contain latent bugs on Unix where paths with invalid UTF-8 are
 not terribly uncommon. If you instead use byte strings, then you're guaranteed
 to write correct code for Unix, at the cost of getting a corner case wrong on
 Windows.
+
+# Cargo features
+
+This crates comes with a few features that control standard library, serde
+and Unicode support.
+
+* `std` - **Enabled** by default. This provides APIs that require the standard
+  library, such as `Vec<u8>` and `PathBuf`. Enabling this feature also enables
+  the `alloc` feature and any other relevant `std` features for dependencies.
+* `alloc` - **Enabled** by default. This provides APIs that require allocations
+  via the `alloc` crate, such as `Vec<u8>`.
+* `unicode` - **Enabled** by default. This provides APIs that require sizable
+  Unicode data compiled into the binary. This includes, but is not limited to,
+  grapheme/word/sentence segmenters. When this is disabled, basic support such
+  as UTF-8 decoding is still included. Note that currently, enabling this
+  feature also requires enabling the `std` feature. It is expected that this
+  limitation will be lifted at some point.
+* `serde` - Enables implementations of serde traits for `BStr`, and also
+  `BString` when `alloc` is enabled.
 */
 
-#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(not(any(feature = "std", test)), no_std)]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+// Why do we do this? Well, in order for us to use once_cell's 'Lazy' type to
+// load DFAs, it requires enabling its 'std' feature. Yet, there is really
+// nothing about our 'unicode' feature that requires 'std'. We could declare
+// that 'unicode = [std, ...]', which would be fine, but once regex-automata
+// 0.3 is a thing, I believe we can drop once_cell altogether and thus drop
+// the need for 'std' to be enabled when 'unicode' is enabled. But if we make
+// 'unicode' also enable 'std', then it would be a breaking change to remove
+// 'std' from that list.
+//
+// So, for right now, we force folks to explicitly say they want 'std' if they
+// want 'unicode'. In the future, we should be able to relax this.
+#[cfg(all(feature = "unicode", not(feature = "std")))]
+compile_error!("enabling 'unicode' requires enabling 'std'");
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
 
 pub use crate::bstr::BStr;
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 pub use crate::bstring::BString;
+#[cfg(feature = "unicode")]
+pub use crate::ext_slice::Fields;
 pub use crate::ext_slice::{
-    ByteSlice, Bytes, Fields, FieldsWith, Find, FindReverse, Finder,
-    FinderReverse, Lines, LinesWithTerminator, Split, SplitN, SplitNReverse,
-    SplitReverse, B,
+    ByteSlice, Bytes, FieldsWith, Find, FindReverse, Finder, FinderReverse,
+    Lines, LinesWithTerminator, Split, SplitN, SplitNReverse, SplitReverse, B,
 };
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 pub use crate::ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error};
 #[cfg(feature = "unicode")]
 pub use crate::unicode::{
@@ -391,26 +434,28 @@ pub use crate::utf8::{
 
 mod ascii;
 mod bstr;
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 mod bstring;
 mod byteset;
 mod ext_slice;
-#[cfg(feature = "std")]
+#[cfg(feature = "alloc")]
 mod ext_vec;
 mod impls;
 #[cfg(feature = "std")]
 pub mod io;
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests;
 #[cfg(feature = "unicode")]
 mod unicode;
 mod utf8;
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod apitests {
-    use crate::bstr::BStr;
-    use crate::bstring::BString;
-    use crate::ext_slice::{Finder, FinderReverse};
+    use crate::{
+        bstr::BStr,
+        bstring::BString,
+        ext_slice::{Finder, FinderReverse},
+    };
 
     #[test]
     fn oibits() {
diff --git a/src/tests.rs b/src/tests.rs
index f4179fd..03a4461 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -6,7 +6,7 @@
 ///
 /// The first element in each tuple is the expected result of lossy decoding,
 /// while the second element is the input given.
-pub const LOSSY_TESTS: &[(&str, &[u8])] = &[
+pub(crate) const LOSSY_TESTS: &[(&str, &[u8])] = &[
     ("a", b"a"),
     ("\u{FFFD}", b"\xFF"),
     ("\u{FFFD}\u{FFFD}", b"\xFF\xFF"),
diff --git a/src/unicode/data/GraphemeBreakTest.txt b/src/unicode/data/GraphemeBreakTest.txt
index fb4fec9..eff2fd3 100644
--- a/src/unicode/data/GraphemeBreakTest.txt
+++ b/src/unicode/data/GraphemeBreakTest.txt
@@ -1,6 +1,6 @@
-# GraphemeBreakTest-12.1.0.txt
-# Date: 2019-03-10, 10:53:12 GMT
-# © 2019 Unicode®, Inc.
+# GraphemeBreakTest-14.0.0.txt
+# Date: 2021-03-08, 06:22:32 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
diff --git a/src/unicode/data/SentenceBreakTest.txt b/src/unicode/data/SentenceBreakTest.txt
index 7c1c34a..61ea42c 100644
--- a/src/unicode/data/SentenceBreakTest.txt
+++ b/src/unicode/data/SentenceBreakTest.txt
@@ -1,6 +1,6 @@
-# SentenceBreakTest-12.1.0.txt
-# Date: 2019-03-10, 10:53:28 GMT
-# © 2019 Unicode®, Inc.
+# SentenceBreakTest-14.0.0.txt
+# Date: 2021-03-08, 06:22:40 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
diff --git a/src/unicode/data/WordBreakTest.txt b/src/unicode/data/WordBreakTest.txt
index facd892..1d1435b 100644
--- a/src/unicode/data/WordBreakTest.txt
+++ b/src/unicode/data/WordBreakTest.txt
@@ -1,6 +1,6 @@
-# WordBreakTest-12.1.0.txt
-# Date: 2019-03-10, 10:53:29 GMT
-# © 2019 Unicode®, Inc.
+# WordBreakTest-14.0.0.txt
+# Date: 2021-03-08, 06:22:40 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
diff --git a/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
index 0efaaf2..31f99c1 100644
--- a/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
+++ b/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
diff --git a/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
index eb24025..3a51728 100644
--- a/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
+++ b/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
diff --git a/src/unicode/fsm/grapheme_break_fwd.rs b/src/unicode/fsm/grapheme_break_fwd.rs
index b53b1d7..dea4a7e 100644
--- a/src/unicode/fsm/grapheme_break_fwd.rs
+++ b/src/unicode/fsm/grapheme_break_fwd.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
index d42cd36..742d2a6 100644
--- a/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
+++ b/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
diff --git a/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
index c75ea5f..d1937f2 100644
--- a/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
+++ b/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
diff --git a/src/unicode/fsm/grapheme_break_rev.rs b/src/unicode/fsm/grapheme_break_rev.rs
index 93e888c..2d2cd54 100644
--- a/src/unicode/fsm/grapheme_break_rev.rs
+++ b/src/unicode/fsm/grapheme_break_rev.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/regional_indicator_rev.rs b/src/unicode/fsm/regional_indicator_rev.rs
index 2bf7e4c..db7a40f 100644
--- a/src/unicode/fsm/regional_indicator_rev.rs
+++ b/src/unicode/fsm/regional_indicator_rev.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
+    ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
+    ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
index a1813d7..1abdae8 100644
--- a/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
+++ b/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
diff --git a/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
index 2763583..2f8aadd 100644
--- a/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
+++ b/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
diff --git a/src/unicode/fsm/sentence_break_fwd.rs b/src/unicode/fsm/sentence_break_fwd.rs
index cc937a4..97dd658 100644
--- a/src/unicode/fsm/sentence_break_fwd.rs
+++ b/src/unicode/fsm/sentence_break_fwd.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/src/unicode/fsm/simple_word_fwd.bigendian.dfa
index adc64c1..888e465 100644
--- a/src/unicode/fsm/simple_word_fwd.bigendian.dfa
+++ b/src/unicode/fsm/simple_word_fwd.bigendian.dfa
diff --git a/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/src/unicode/fsm/simple_word_fwd.littleendian.dfa
index dd48386..a1d527c 100644
--- a/src/unicode/fsm/simple_word_fwd.littleendian.dfa
+++ b/src/unicode/fsm/simple_word_fwd.littleendian.dfa
diff --git a/src/unicode/fsm/simple_word_fwd.rs b/src/unicode/fsm/simple_word_fwd.rs
index f1f3da5..32b69b6 100644
--- a/src/unicode/fsm/simple_word_fwd.rs
+++ b/src/unicode/fsm/simple_word_fwd.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/whitespace_anchored_fwd.rs b/src/unicode/fsm/whitespace_anchored_fwd.rs
index 419b5d4..0780412 100644
--- a/src/unicode/fsm/whitespace_anchored_fwd.rs
+++ b/src/unicode/fsm/whitespace_anchored_fwd.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/whitespace_anchored_rev.rs b/src/unicode/fsm/whitespace_anchored_rev.rs
index 301b03c..3d0d7a6 100644
--- a/src/unicode/fsm/whitespace_anchored_rev.rs
+++ b/src/unicode/fsm/whitespace_anchored_rev.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u16], u16> = {
+pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
+    ::regex_automata::DenseDFA<&'static [u16], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u16; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u16], u16> = {
+pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
+    ::regex_automata::DenseDFA<&'static [u16], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u16; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/fsm/word_break_fwd.bigendian.dfa b/src/unicode/fsm/word_break_fwd.bigendian.dfa
index 1e75db6..efb9c81 100644
--- a/src/unicode/fsm/word_break_fwd.bigendian.dfa
+++ b/src/unicode/fsm/word_break_fwd.bigendian.dfa
diff --git a/src/unicode/fsm/word_break_fwd.littleendian.dfa b/src/unicode/fsm/word_break_fwd.littleendian.dfa
index e3093a3..9a716d0 100644
--- a/src/unicode/fsm/word_break_fwd.littleendian.dfa
+++ b/src/unicode/fsm/word_break_fwd.littleendian.dfa
diff --git a/src/unicode/fsm/word_break_fwd.rs b/src/unicode/fsm/word_break_fwd.rs
index fb041b7..dcb5f6b 100644
--- a/src/unicode/fsm/word_break_fwd.rs
+++ b/src/unicode/fsm/word_break_fwd.rs
@@ -2,11 +2,12 @@
 //
 //   ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
 //
-// ucd-generate 0.2.9 is available on crates.io.
+// ucd-generate 0.2.12 is available on crates.io.
 
 #[cfg(target_endian = "big")]
-lazy_static::lazy_static! {
-  pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -18,15 +19,13 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
 
 #[cfg(target_endian = "little")]
-lazy_static::lazy_static! {
-  pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
+    ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
     #[repr(C)]
     struct Aligned<B: ?Sized> {
         _align: [u8; 0],
@@ -38,8 +37,5 @@ lazy_static::lazy_static! {
         bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
     };
 
-    unsafe {
-      ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes)
-    }
-  };
-}
+    unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/src/unicode/grapheme.rs b/src/unicode/grapheme.rs
index ad31cf1..13b730c 100644
--- a/src/unicode/grapheme.rs
+++ b/src/unicode/grapheme.rs
@@ -1,10 +1,14 @@
 use regex_automata::DFA;
 
-use crate::ext_slice::ByteSlice;
-use crate::unicode::fsm::grapheme_break_fwd::GRAPHEME_BREAK_FWD;
-use crate::unicode::fsm::grapheme_break_rev::GRAPHEME_BREAK_REV;
-use crate::unicode::fsm::regional_indicator_rev::REGIONAL_INDICATOR_REV;
-use crate::utf8;
+use crate::{
+    ext_slice::ByteSlice,
+    unicode::fsm::{
+        grapheme_break_fwd::GRAPHEME_BREAK_FWD,
+        grapheme_break_rev::GRAPHEME_BREAK_REV,
+        regional_indicator_rev::REGIONAL_INDICATOR_REV,
+    },
+    utf8,
+};
 
 /// An iterator over grapheme clusters in a byte string.
 ///
@@ -125,7 +129,7 @@ pub struct GraphemeIndices<'a> {
 
 impl<'a> GraphemeIndices<'a> {
     pub(crate) fn new(bs: &'a [u8]) -> GraphemeIndices<'a> {
-        GraphemeIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
+        GraphemeIndices { bs, forward_index: 0, reverse_index: bs.len() }
     }
 
     /// View the underlying data as a subslice of the original data.
@@ -191,6 +195,22 @@ impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
 pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
     if bs.is_empty() {
         ("", 0)
+    } else if bs.len() >= 2
+        && bs[0].is_ascii()
+        && bs[1].is_ascii()
+        && !bs[0].is_ascii_whitespace()
+    {
+        // FIXME: It is somewhat sad that we have to special case this, but it
+        // leads to a significant speed up in predominantly ASCII text. The
+        // issue here is that the DFA has a bit of overhead, and running it for
+        // every byte in mostly ASCII text results in a bit slowdown. We should
+        // re-litigate this once regex-automata 0.3 is out, but it might be
+        // hard to avoid the special case. A DFA is always going to at least
+        // require some memory access.
+
+        // Safe because all ASCII bytes are valid UTF-8.
+        let grapheme = unsafe { bs[..1].to_str_unchecked() };
+        (grapheme, 1)
     } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
         // Safe because a match can only occur for valid UTF-8.
         let grapheme = unsafe { bs[..end].to_str_unchecked() };
@@ -257,15 +277,17 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
+    #[cfg(not(miri))]
     use ucd_parse::GraphemeClusterBreakTest;
 
+    use crate::{ext_slice::ByteSlice, tests::LOSSY_TESTS};
+
     use super::*;
-    use crate::ext_slice::ByteSlice;
-    use crate::tests::LOSSY_TESTS;
 
     #[test]
+    #[cfg(not(miri))]
     fn forward_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.grapheme_clusters.concat();
@@ -288,6 +310,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(miri))]
     fn reverse_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.grapheme_clusters.concat();
@@ -329,15 +352,18 @@ mod tests {
         }
     }
 
+    #[cfg(not(miri))]
     fn uniescape(s: &str) -> String {
         s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
     }
 
+    #[cfg(not(miri))]
     fn uniescape_vec(strs: &[String]) -> Vec<String> {
         strs.iter().map(|s| uniescape(s)).collect()
     }
 
     /// Return all of the UCD for grapheme breaks.
+    #[cfg(not(miri))]
     fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
         const TESTDATA: &'static str =
             include_str!("data/GraphemeBreakTest.txt");
diff --git a/src/unicode/mod.rs b/src/unicode/mod.rs
index 60318f4..80638e8 100644
--- a/src/unicode/mod.rs
+++ b/src/unicode/mod.rs
@@ -1,8 +1,8 @@
-pub use self::grapheme::{decode_grapheme, GraphemeIndices, Graphemes};
-pub use self::sentence::{SentenceIndices, Sentences};
-pub use self::whitespace::{whitespace_len_fwd, whitespace_len_rev};
-pub use self::word::{
-    WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks,
+pub use self::{
+    grapheme::{decode_grapheme, GraphemeIndices, Graphemes},
+    sentence::{SentenceIndices, Sentences},
+    whitespace::{whitespace_len_fwd, whitespace_len_rev},
+    word::{WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks},
 };
 
 mod fsm;
diff --git a/src/unicode/sentence.rs b/src/unicode/sentence.rs
index 063f342..ff29c7e 100644
--- a/src/unicode/sentence.rs
+++ b/src/unicode/sentence.rs
@@ -1,8 +1,9 @@
 use regex_automata::DFA;
 
-use crate::ext_slice::ByteSlice;
-use crate::unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD;
-use crate::utf8;
+use crate::{
+    ext_slice::ByteSlice,
+    unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD, utf8,
+};
 
 /// An iterator over sentences in a byte string.
 ///
@@ -97,7 +98,7 @@ pub struct SentenceIndices<'a> {
 
 impl<'a> SentenceIndices<'a> {
     pub(crate) fn new(bs: &'a [u8]) -> SentenceIndices<'a> {
-        SentenceIndices { bs: bs, forward_index: 0 }
+        SentenceIndices { bs, forward_index: 0 }
     }
 
     /// View the underlying data as a subslice of the original data.
@@ -156,13 +157,15 @@ fn decode_sentence(bs: &[u8]) -> (&str, usize) {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
+    #[cfg(not(miri))]
     use ucd_parse::SentenceBreakTest;
 
     use crate::ext_slice::ByteSlice;
 
     #[test]
+    #[cfg(not(miri))]
     fn forward_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.sentences.concat();
@@ -198,11 +201,13 @@ mod tests {
         bytes.sentences().collect()
     }
 
+    #[cfg(not(miri))]
     fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
         strs.iter().map(|s| s.as_ref().as_bytes()).collect()
     }
 
     /// Return all of the UCD for sentence breaks.
+    #[cfg(not(miri))]
     fn ucdtests() -> Vec<SentenceBreakTest> {
         const TESTDATA: &'static str =
             include_str!("data/SentenceBreakTest.txt");
diff --git a/src/unicode/whitespace.rs b/src/unicode/whitespace.rs
index 949a83f..b5eff30 100644
--- a/src/unicode/whitespace.rs
+++ b/src/unicode/whitespace.rs
@@ -1,7 +1,9 @@
 use regex_automata::DFA;
 
-use crate::unicode::fsm::whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD;
-use crate::unicode::fsm::whitespace_anchored_rev::WHITESPACE_ANCHORED_REV;
+use crate::unicode::fsm::{
+    whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD,
+    whitespace_anchored_rev::WHITESPACE_ANCHORED_REV,
+};
 
 /// Return the first position of a non-whitespace character.
 pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
diff --git a/src/unicode/word.rs b/src/unicode/word.rs
index e0a5701..849f0c8 100644
--- a/src/unicode/word.rs
+++ b/src/unicode/word.rs
@@ -1,9 +1,12 @@
 use regex_automata::DFA;
 
-use crate::ext_slice::ByteSlice;
-use crate::unicode::fsm::simple_word_fwd::SIMPLE_WORD_FWD;
-use crate::unicode::fsm::word_break_fwd::WORD_BREAK_FWD;
-use crate::utf8;
+use crate::{
+    ext_slice::ByteSlice,
+    unicode::fsm::{
+        simple_word_fwd::SIMPLE_WORD_FWD, word_break_fwd::WORD_BREAK_FWD,
+    },
+    utf8,
+};
 
 /// An iterator over words in a byte string.
 ///
@@ -254,7 +257,7 @@ pub struct WordsWithBreakIndices<'a> {
 
 impl<'a> WordsWithBreakIndices<'a> {
     pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreakIndices<'a> {
-        WordsWithBreakIndices { bs: bs, forward_index: 0 }
+        WordsWithBreakIndices { bs, forward_index: 0 }
     }
 
     /// View the underlying data as a subslice of the original data.
@@ -316,13 +319,15 @@ fn decode_word(bs: &[u8]) -> (&str, usize) {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
+    #[cfg(not(miri))]
     use ucd_parse::WordBreakTest;
 
     use crate::ext_slice::ByteSlice;
 
     #[test]
+    #[cfg(not(miri))]
     fn forward_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.words.concat();
@@ -379,17 +384,26 @@ mod tests {
         assert_eq!(vec!["1XY"], words(b"1XY"));
 
         assert_eq!(vec!["\u{FEFF}", "Ты"], words("\u{FEFF}Ты".as_bytes()));
+
+        // Tests that Vithkuqi works, which was introduced in Unicode 14.
+        // This test fails prior to Unicode 14.
+        assert_eq!(
+            vec!["\u{10570}\u{10597}"],
+            words("\u{10570}\u{10597}".as_bytes())
+        );
     }
 
     fn words(bytes: &[u8]) -> Vec<&str> {
         bytes.words_with_breaks().collect()
     }
 
+    #[cfg(not(miri))]
     fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
         strs.iter().map(|s| s.as_ref().as_bytes()).collect()
     }
 
     /// Return all of the UCD for word breaks.
+    #[cfg(not(miri))]
     fn ucdtests() -> Vec<WordBreakTest> {
         const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
 
diff --git a/src/utf8.rs b/src/utf8.rs
index 5c7de36..bc9bc52 100644
--- a/src/utf8.rs
+++ b/src/utf8.rs
@@ -1,13 +1,9 @@
-use core::char;
-use core::cmp;
-use core::fmt;
-use core::str;
+use core::{char, cmp, fmt, str};
+
 #[cfg(feature = "std")]
 use std::error;
 
-use crate::ascii;
-use crate::bstr::BStr;
-use crate::ext_slice::ByteSlice;
+use crate::{ascii, bstr::BStr, ext_slice::ByteSlice};
 
 // The UTF-8 decoder provided here is based on the one presented here:
 // https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
@@ -75,7 +71,7 @@ const STATES_FORWARD: &'static [u8] = &[
 ///
 /// When invalid UTF-8 byte sequences are found, they are substituted with the
 /// Unicode replacement codepoint (`U+FFFD`) using the
-/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html).
+/// ["maximal subpart" strategy](https://www.unicode.org/review/pr-121.html).
 ///
 /// This iterator is created by the
 /// [`chars`](trait.ByteSlice.html#method.chars) method provided by the
@@ -146,7 +142,7 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
 ///
 /// When invalid UTF-8 byte sequences are found, they are substituted with the
 /// Unicode replacement codepoint (`U+FFFD`) using the
-/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html).
+/// ["maximal subpart" strategy](https://www.unicode.org/review/pr-121.html).
 ///
 /// Note that this is slightly different from the `CharIndices` iterator
 /// provided by the standard library. Aside from working on possibly invalid
@@ -168,7 +164,7 @@ pub struct CharIndices<'a> {
 
 impl<'a> CharIndices<'a> {
     pub(crate) fn new(bs: &'a [u8]) -> CharIndices<'a> {
-        CharIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
+        CharIndices { bs, forward_index: 0, reverse_index: bs.len() }
     }
 
     /// View the underlying data as a subslice of the original data.
@@ -406,7 +402,7 @@ impl<'a> ::core::iter::FusedIterator for Utf8Chunks<'a> {}
 /// assert_eq!(err.valid_up_to(), 6);
 /// assert_eq!(err.error_len(), Some(1));
 /// ```
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Utf8Error {
     valid_up_to: usize,
     error_len: Option<usize>,
@@ -854,13 +850,15 @@ fn is_leading_or_invalid_utf8_byte(b: u8) -> bool {
     (b & 0b1100_0000) != 0b1000_0000
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "std"))]
 mod tests {
     use std::char;
 
-    use crate::ext_slice::{ByteSlice, B};
-    use crate::tests::LOSSY_TESTS;
-    use crate::utf8::{self, Utf8Error};
+    use crate::{
+        ext_slice::{ByteSlice, B},
+        tests::LOSSY_TESTS,
+        utf8::{self, Utf8Error},
+    };
 
     fn utf8e(valid_up_to: usize) -> Utf8Error {
         Utf8Error { valid_up_to, error_len: None }
@@ -871,6 +869,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(miri))]
     fn validate_all_codepoints() {
         for i in 0..(0x10FFFF + 1) {
             let cp = match char::from_u32(i) {