diff options
Diffstat (limited to 'src/ext_slice.rs')
-rw-r--r-- | src/ext_slice.rs | 150 |
1 files changed, 55 insertions, 95 deletions
diff --git a/src/ext_slice.rs b/src/ext_slice.rs index fa08190..0cc73af 100644 --- a/src/ext_slice.rs +++ b/src/ext_slice.rs @@ -5,22 +5,21 @@ use std::ffi::OsStr; #[cfg(feature = "std")] use std::path::Path; -use core::{cmp, iter, ops, ptr, slice, str}; -use memchr::{memchr, memrchr}; +use core::{iter, ops, ptr, slice, str}; +use memchr::{memchr, memmem, memrchr}; -use ascii; -use bstr::BStr; -use byteset; +use crate::ascii; +use crate::bstr::BStr; +use crate::byteset; #[cfg(feature = "std")] -use ext_vec::ByteVec; -use search::{PrefilterState, TwoWay}; +use crate::ext_vec::ByteVec; #[cfg(feature = "unicode")] -use unicode::{ +use crate::unicode::{ whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes, SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks, }; -use utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error}; +use crate::utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error}; /// A short-hand constructor for building a `&[u8]`. /// @@ -344,7 +343,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "std")] #[inline] - fn to_str_lossy(&self) -> Cow<str> { + fn to_str_lossy(&self) -> Cow<'_, str> { match utf8::validate(self.as_bytes()) { Ok(()) => { // SAFETY: This is safe because of the guarantees provided by @@ -488,10 +487,10 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "std")] #[inline] - fn to_os_str_lossy(&self) -> Cow<OsStr> { + fn to_os_str_lossy(&self) -> Cow<'_, OsStr> { #[cfg(unix)] #[inline] - fn imp(bytes: &[u8]) -> Cow<OsStr> { + fn imp(bytes: &[u8]) -> Cow<'_, OsStr> { use std::os::unix::ffi::OsStrExt; Cow::Borrowed(OsStr::from_bytes(bytes)) @@ -559,7 +558,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "std")] #[inline] - fn to_path_lossy(&self) -> Cow<Path> { + fn to_path_lossy(&self) -> Cow<'_, Path> { use std::path::PathBuf; match self.to_os_str_lossy() { @@ -1067,7 +1066,7 @@ pub trait ByteSlice: Sealed { /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count()); /// ``` #[inline] - fn fields(&self) -> Fields { + fn fields(&self) -> Fields<'_> { Fields::new(self.as_bytes()) } @@ -1099,7 +1098,7 @@ pub trait ByteSlice: Sealed { /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count()); /// ``` #[inline] - fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<F> { + fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> { FieldsWith::new(self.as_bytes(), f) } @@ -1619,7 +1618,7 @@ pub trait ByteSlice: Sealed { /// assert_eq!(bytes, bs); /// ``` #[inline] - fn bytes(&self) -> Bytes { + fn bytes(&self) -> Bytes<'_> { Bytes { it: self.as_bytes().iter() } } @@ -1649,7 +1648,7 @@ pub trait ByteSlice: Sealed { /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars); /// ``` #[inline] - fn chars(&self) -> Chars { + fn chars(&self) -> Chars<'_> { Chars::new(self.as_bytes()) } @@ -1704,7 +1703,7 @@ pub trait ByteSlice: Sealed { /// ]); /// ``` #[inline] - fn char_indices(&self) -> CharIndices { + fn char_indices(&self) -> CharIndices<'_> { CharIndices::new(self.as_bytes()) } @@ -1741,7 +1740,7 @@ pub trait ByteSlice: Sealed { /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]); /// ``` #[inline] - fn utf8_chunks(&self) -> Utf8Chunks { + fn utf8_chunks(&self) -> Utf8Chunks<'_> { Utf8Chunks { bytes: self.as_bytes() } } @@ -1773,7 +1772,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn graphemes(&self) -> Graphemes { + fn graphemes(&self) -> Graphemes<'_> { Graphemes::new(self.as_bytes()) } @@ -1817,7 +1816,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn grapheme_indices(&self) -> GraphemeIndices { + fn grapheme_indices(&self) -> GraphemeIndices<'_> { GraphemeIndices::new(self.as_bytes()) } @@ -1853,7 +1852,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn words(&self) -> Words { + fn words(&self) -> Words<'_> { Words::new(self.as_bytes()) } @@ -1891,7 +1890,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn word_indices(&self) -> WordIndices { + fn word_indices(&self) -> WordIndices<'_> { WordIndices::new(self.as_bytes()) } @@ -1921,7 +1920,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn words_with_breaks(&self) -> WordsWithBreaks { + fn words_with_breaks(&self) -> WordsWithBreaks<'_> { WordsWithBreaks::new(self.as_bytes()) } @@ -1958,7 +1957,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn words_with_break_indices(&self) -> WordsWithBreakIndices { + fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> { WordsWithBreakIndices::new(self.as_bytes()) } @@ -1990,7 +1989,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn sentences(&self) -> Sentences { + fn sentences(&self) -> Sentences<'_> { Sentences::new(self.as_bytes()) } @@ -2024,7 +2023,7 @@ pub trait ByteSlice: Sealed { /// ``` #[cfg(feature = "unicode")] #[inline] - fn sentence_indices(&self) -> SentenceIndices { + fn sentence_indices(&self) -> SentenceIndices<'_> { SentenceIndices::new(self.as_bytes()) } @@ -2055,7 +2054,7 @@ pub trait ByteSlice: Sealed { /// ]); /// ``` #[inline] - fn lines(&self) -> Lines { + fn lines(&self) -> Lines<'_> { Lines::new(self.as_bytes()) } @@ -2099,7 +2098,7 @@ pub trait ByteSlice: Sealed { /// ]); /// ``` #[inline] - fn lines_with_terminator(&self) -> LinesWithTerminator { + fn lines_with_terminator(&self) -> LinesWithTerminator<'_> { LinesWithTerminator::new(self.as_bytes()) } @@ -2789,7 +2788,7 @@ pub trait ByteSlice: Sealed { #[cfg(feature = "unicode")] #[inline] fn reverse_graphemes(&mut self) { - use unicode::decode_grapheme; + use crate::unicode::decode_grapheme; let mut i = 0; loop { @@ -2986,15 +2985,13 @@ pub trait ByteSlice: Sealed { /// version which permits building a `Finder` that is not connected to the /// lifetime of its needle. #[derive(Clone, Debug)] -pub struct Finder<'a> { - searcher: TwoWay<'a>, -} +pub struct Finder<'a>(memmem::Finder<'a>); impl<'a> Finder<'a> { /// Create a new finder for the given needle. #[inline] pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> { - Finder { searcher: TwoWay::forward(needle.as_ref()) } + Finder(memmem::Finder::new(needle.as_ref())) } /// Convert this finder into its owned variant, such that it no longer @@ -3007,7 +3004,7 @@ impl<'a> Finder<'a> { #[cfg(feature = "std")] #[inline] pub fn into_owned(self) -> Finder<'static> { - Finder { searcher: self.searcher.into_owned() } + Finder(self.0.into_owned()) } /// Returns the needle that this finder searches for. @@ -3018,7 +3015,7 @@ impl<'a> Finder<'a> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.0.needle() } /// Returns the index of the first occurrence of this needle in the given @@ -3050,7 +3047,7 @@ impl<'a> Finder<'a> { /// ``` #[inline] pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { - self.searcher.find(haystack.as_ref()) + self.0.find(haystack.as_ref()) } } @@ -3071,15 +3068,13 @@ impl<'a> Finder<'a> { /// version which permits building a `FinderReverse` that is not connected to /// the lifetime of its needle. #[derive(Clone, Debug)] -pub struct FinderReverse<'a> { - searcher: TwoWay<'a>, -} +pub struct FinderReverse<'a>(memmem::FinderRev<'a>); impl<'a> FinderReverse<'a> { /// Create a new reverse finder for the given needle. #[inline] pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> { - FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) } + FinderReverse(memmem::FinderRev::new(needle.as_ref())) } /// Convert this finder into its owned variant, such that it no longer @@ -3092,7 +3087,7 @@ impl<'a> FinderReverse<'a> { #[cfg(feature = "std")] #[inline] pub fn into_owned(self) -> FinderReverse<'static> { - FinderReverse { searcher: self.searcher.into_owned() } + FinderReverse(self.0.into_owned()) } /// Returns the needle that this finder searches for. @@ -3103,7 +3098,7 @@ impl<'a> FinderReverse<'a> { /// the needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.0.needle() } /// Returns the index of the last occurrence of this needle in the given @@ -3135,7 +3130,7 @@ impl<'a> FinderReverse<'a> { /// ``` #[inline] pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { - self.searcher.rfind(haystack.as_ref()) + self.0.rfind(haystack.as_ref()) } } @@ -3147,17 +3142,14 @@ impl<'a> FinderReverse<'a> { /// byte string being looked for. #[derive(Debug)] pub struct Find<'a> { + it: memmem::FindIter<'a, 'a>, haystack: &'a [u8], - prestate: PrefilterState, - searcher: TwoWay<'a>, - pos: usize, + needle: &'a [u8], } impl<'a> Find<'a> { fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> { - let searcher = TwoWay::forward(needle); - let prestate = searcher.prefilter_state(); - Find { haystack, prestate, searcher, pos: 0 } + Find { it: memmem::find_iter(haystack, needle), haystack, needle } } } @@ -3166,20 +3158,7 @@ impl<'a> Iterator for Find<'a> { #[inline] fn next(&mut self) -> Option<usize> { - if self.pos > self.haystack.len() { - return None; - } - let result = self - .searcher - .find_with(&mut self.prestate, &self.haystack[self.pos..]); - match result { - None => None, - Some(i) => { - let pos = self.pos + i; - self.pos = pos + cmp::max(1, self.searcher.needle().len()); - Some(pos) - } - } + self.it.next() } } @@ -3191,20 +3170,18 @@ impl<'a> Iterator for Find<'a> { /// byte string being looked for. #[derive(Debug)] pub struct FindReverse<'a> { + it: memmem::FindRevIter<'a, 'a>, haystack: &'a [u8], - prestate: PrefilterState, - searcher: TwoWay<'a>, - /// When searching with an empty needle, this gets set to `None` after - /// we've yielded the last element at `0`. - pos: Option<usize>, + needle: &'a [u8], } impl<'a> FindReverse<'a> { fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> { - let searcher = TwoWay::reverse(needle); - let prestate = searcher.prefilter_state(); - let pos = Some(haystack.len()); - FindReverse { haystack, prestate, searcher, pos } + FindReverse { + it: memmem::rfind_iter(haystack, needle), + haystack, + needle, + } } fn haystack(&self) -> &'a [u8] { @@ -3212,7 +3189,7 @@ impl<'a> FindReverse<'a> { } fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle } } @@ -3221,24 +3198,7 @@ impl<'a> Iterator for FindReverse<'a> { #[inline] fn next(&mut self) -> Option<usize> { - let pos = match self.pos { - None => return None, - Some(pos) => pos, - }; - let result = self - .searcher - .rfind_with(&mut self.prestate, &self.haystack[..pos]); - match result { - None => None, - Some(i) => { - if pos == i { - self.pos = pos.checked_sub(1); - } else { - self.pos = Some(i); - } - Some(i) - } - } + self.it.next() } } @@ -3398,7 +3358,7 @@ impl<'a> Iterator for Split<'a> { match self.finder.next() { Some(start) => { let next = &haystack[self.last..start]; - self.last = start + self.finder.searcher.needle().len(); + self.last = start + self.finder.needle.len(); Some(next) } None => { @@ -3633,8 +3593,8 @@ impl<'a> Iterator for LinesWithTerminator<'a> { #[cfg(test)] mod tests { - use ext_slice::{ByteSlice, B}; - use tests::LOSSY_TESTS; + use crate::ext_slice::{ByteSlice, B}; + use crate::tests::LOSSY_TESTS; #[test] fn to_str_lossy() { |