1 files changed, 0 insertions, 424 deletions
diff --git a/src/search/prefilter.rs b/src/search/prefilter.rs
deleted file mode 100644
index 00e6acf..0000000
--- a/src/search/prefilter.rs
+++ /dev/null
@@ -1,424 +0,0 @@
-use core::mem;
-
-use ext_slice::ByteSlice;
-use search::byte_frequencies::BYTE_FREQUENCIES;
-
-/// PrefilterState tracks state associated with the effectiveness of a
-/// prefilter. It is used to track how many bytes, on average, are skipped by
-/// the prefilter. If this average dips below a certain threshold over time,
-/// then the state renders the prefilter inert and stops using it.
-///
-/// A prefilter state should be created for each search. (Where creating an
-/// iterator via, e.g., `find_iter`, is treated as a single search.)
-#[derive(Clone, Debug)]
-pub struct PrefilterState {
-    /// The number of skips that has been executed.
-    skips: usize,
-    /// The total number of bytes that have been skipped.
-    skipped: usize,
-    /// The maximum length of a match. This is used to help determine how many
-    /// bytes on average should be skipped in order for a prefilter to be
-    /// effective.
-    max_match_len: usize,
-    /// Once this heuristic has been deemed ineffective, it will be inert
-    /// throughout the rest of its lifetime. This serves as a cheap way to
-    /// check inertness.
-    inert: bool,
-}
-
-impl PrefilterState {
-    /// The minimum number of skip attempts to try before considering whether
-    /// a prefilter is effective or not.
-    const MIN_SKIPS: usize = 50;
-
-    /// The minimum amount of bytes that skipping must average.
-    ///
-    /// This value was chosen based on varying it and checking the bstr/find/
-    /// microbenchmarks. In particular, this can impact the
-    /// pathological/repeated-{huge,small} benchmarks quite a bit if it's
-    /// set too low.
-    const MIN_SKIP_BYTES: usize = 8;
-
-    /// Create a fresh prefilter state.
-    pub fn new(max_match_len: usize) -> PrefilterState {
-        if max_match_len == 0 {
-            return PrefilterState::inert();
-        }
-        PrefilterState { skips: 0, skipped: 0, max_match_len, inert: false }
-    }
-
-    /// Create a fresh prefilter state that is always inert.
-    fn inert() -> PrefilterState {
-        PrefilterState { skips: 0, skipped: 0, max_match_len: 0, inert: true }
-    }
-
-    /// Update this state with the number of bytes skipped on the last
-    /// invocation of the prefilter.
-    #[inline]
-    pub fn update(&mut self, skipped: usize) {
-        self.skips += 1;
-        self.skipped += skipped;
-    }
-
-    /// Return true if and only if this state indicates that a prefilter is
-    /// still effective.
-    #[inline]
-    pub fn is_effective(&mut self) -> bool {
-        if self.inert {
-            return false;
-        }
-        if self.skips < PrefilterState::MIN_SKIPS {
-            return true;
-        }
-        if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips {
-            return true;
-        }
-
-        // We're inert.
-        self.inert = true;
-        false
-    }
-}
-
-/// A heuristic frequency based prefilter for searching a single needle.
-///
-/// This prefilter attempts to pick out the byte in a needle that is predicted
-/// to occur least frequently, and search for that using fast vectorized
-/// routines. If a rare enough byte could not be found, then this prefilter's
-/// constructors will return `None`.
-///
-/// This can be combined with `PrefilterState` to dynamically render this
-/// prefilter inert if it proves to ineffective.
-#[derive(Clone, Debug)]
-pub struct Freqy {
-    /// Whether this prefilter should be used or not.
-    inert: bool,
-    /// The length of the needle we're searching for.
-    needle_len: usize,
-    /// The rarest byte in the needle, according to pre-computed frequency
-    /// analysis.
-    rare1: u8,
-    /// The leftmost offset of the rarest byte in the needle.
-    rare1i: usize,
-    /// The second rarest byte in the needle, according to pre-computed
-    /// frequency analysis. (This may be equivalent to the rarest byte.)
-    ///
-    /// The second rarest byte is used as a type of guard for quickly detecting
-    /// a mismatch after memchr locates an instance of the rarest byte. This
-    /// is a hedge against pathological cases where the pre-computed frequency
-    /// analysis may be off. (But of course, does not prevent *all*
-    /// pathological cases.)
-    rare2: u8,
-    /// The leftmost offset of the second rarest byte in the needle.
-    rare2i: usize,
-}
-
-impl Freqy {
-    /// The maximum frequency rank permitted. If the rarest byte in the needle
-    /// has a frequency rank above this value, then Freqy is not used.
-    const MAX_RANK: usize = 200;
-
-    /// Return a fresh prefilter state that can be used with this prefilter. A
-    /// prefilter state is used to track the effectiveness of a prefilter for
-    /// speeding up searches. Therefore, the prefilter state should generally
-    /// be reused on subsequent searches (such as in an iterator). For searches
-    /// on a different haystack, then a new prefilter state should be used.
-    pub fn prefilter_state(&self) -> PrefilterState {
-        if self.inert {
-            PrefilterState::inert()
-        } else {
-            PrefilterState::new(self.needle_len)
-        }
-    }
-
-    /// Returns a valid but inert prefilter. This is valid for both the forward
-    /// and reverse direction.
-    ///
-    /// It is never correct to use an inert prefilter. The results of finding
-    /// the next (or previous) candidate are unspecified.
-    fn inert() -> Freqy {
-        Freqy {
-            inert: true,
-            needle_len: 0,
-            rare1: 0,
-            rare1i: 0,
-            rare2: 0,
-            rare2i: 0,
-        }
-    }
-
-    /// Return search info for the given needle in the forward direction.
-    pub fn forward(needle: &[u8]) -> Freqy {
-        if needle.is_empty() {
-            return Freqy::inert();
-        }
-
-        // Find the rarest two bytes. Try to make them distinct (but it's not
-        // required).
-        let (mut rare1, mut rare1i) = (needle[0], 0);
-        let (mut rare2, mut rare2i) = (needle[0], 0);
-        if needle.len() >= 2 {
-            rare2 = needle[1];
-            rare2i = 1;
-        }
-        if Freqy::rank(rare2) < Freqy::rank(rare1) {
-            mem::swap(&mut rare1, &mut rare2);
-            mem::swap(&mut rare1i, &mut rare2i);
-        }
-        for (i, b) in needle.bytes().enumerate().skip(2) {
-            if Freqy::rank(b) < Freqy::rank(rare1) {
-                rare2 = rare1;
-                rare2i = rare1i;
-                rare1 = b;
-                rare1i = i;
-            } else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
-                rare2 = b;
-                rare2i = i;
-            }
-        }
-        if Freqy::rank(rare1) > Freqy::MAX_RANK {
-            return Freqy::inert();
-        }
-        let needle_len = needle.len();
-        Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
-    }
-
-    /// Return search info for the given needle in the reverse direction.
-    pub fn reverse(needle: &[u8]) -> Freqy {
-        if needle.is_empty() {
-            return Freqy::inert();
-        }
-
-        // Find the rarest two bytes. Try to make them distinct (but it's not
-        // required). In reverse, the offsets correspond to the number of bytes
-        // from the end of the needle. So `0` is the last byte in the needle.
-        let (mut rare1i, mut rare2i) = (0, 0);
-        if needle.len() >= 2 {
-            rare2i += 1;
-        }
-        let mut rare1 = needle[needle.len() - rare1i - 1];
-        let mut rare2 = needle[needle.len() - rare2i - 1];
-        if Freqy::rank(rare2) < Freqy::rank(rare1) {
-            mem::swap(&mut rare1, &mut rare2);
-            mem::swap(&mut rare1i, &mut rare2i);
-        }
-        for (i, b) in needle.bytes().rev().enumerate().skip(2) {
-            if Freqy::rank(b) < Freqy::rank(rare1) {
-                rare2 = rare1;
-                rare2i = rare1i;
-                rare1 = b;
-                rare1i = i;
-            } else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
-                rare2 = b;
-                rare2i = i;
-            }
-        }
-        if Freqy::rank(rare1) > Freqy::MAX_RANK {
-            return Freqy::inert();
-        }
-        let needle_len = needle.len();
-        Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
-    }
-
-    /// Look for a possible occurrence of needle. The position returned
-    /// corresponds to the beginning of the occurrence, if one exists.
-    ///
-    /// Callers may assume that this never returns false negatives (i.e., it
-    /// never misses an actual occurrence), but must check that the returned
-    /// position corresponds to a match. That is, it can return false
-    /// positives.
-    ///
-    /// This should only be used when Freqy is constructed for forward
-    /// searching.
-    pub fn find_candidate(
-        &self,
-        prestate: &mut PrefilterState,
-        haystack: &[u8],
-    ) -> Option<usize> {
-        debug_assert!(!self.inert);
-
-        let mut i = 0;
-        while prestate.is_effective() {
-            // Use a fast vectorized implementation to skip to the next
-            // occurrence of the rarest byte (heuristically chosen) in the
-            // needle.
-            i += match haystack[i..].find_byte(self.rare1) {
-                None => return None,
-                Some(found) => {
-                    prestate.update(found);
-                    found
-                }
-            };
-
-            // If we can't align our first match with the haystack, then a
-            // match is impossible.
-            if i < self.rare1i {
-                i += 1;
-                continue;
-            }
-
-            // Align our rare2 byte with the haystack. A mismatch means that
-            // a match is impossible.
-            let aligned_rare2i = i - self.rare1i + self.rare2i;
-            if haystack.get(aligned_rare2i) != Some(&self.rare2) {
-                i += 1;
-                continue;
-            }
-
-            // We've done what we can. There might be a match here.
-            return Some(i - self.rare1i);
-        }
-        // The only way we get here is if we believe our skipping heuristic
-        // has become ineffective. We're allowed to return false positives,
-        // so return the position at which we advanced to, aligned to the
-        // haystack.
-        Some(i.saturating_sub(self.rare1i))
-    }
-
-    /// Look for a possible occurrence of needle, in reverse, starting from the
-    /// end of the given haystack. The position returned corresponds to the
-    /// position immediately after the end of the occurrence, if one exists.
-    ///
-    /// Callers may assume that this never returns false negatives (i.e., it
-    /// never misses an actual occurrence), but must check that the returned
-    /// position corresponds to a match. That is, it can return false
-    /// positives.
-    ///
-    /// This should only be used when Freqy is constructed for reverse
-    /// searching.
-    pub fn rfind_candidate(
-        &self,
-        prestate: &mut PrefilterState,
-        haystack: &[u8],
-    ) -> Option<usize> {
-        debug_assert!(!self.inert);
-
-        let mut i = haystack.len();
-        while prestate.is_effective() {
-            // Use a fast vectorized implementation to skip to the next
-            // occurrence of the rarest byte (heuristically chosen) in the
-            // needle.
-            i = match haystack[..i].rfind_byte(self.rare1) {
-                None => return None,
-                Some(found) => {
-                    prestate.update(i - found);
-                    found
-                }
-            };
-
-            // If we can't align our first match with the haystack, then a
-            // match is impossible.
-            if i + self.rare1i + 1 > haystack.len() {
-                continue;
-            }
-
-            // Align our rare2 byte with the haystack. A mismatch means that
-            // a match is impossible.
-            let aligned = match (i + self.rare1i).checked_sub(self.rare2i) {
-                None => continue,
-                Some(aligned) => aligned,
-            };
-            if haystack.get(aligned) != Some(&self.rare2) {
-                continue;
-            }
-
-            // We've done what we can. There might be a match here.
-            return Some(i + self.rare1i + 1);
-        }
-        // The only way we get here is if we believe our skipping heuristic
-        // has become ineffective. We're allowed to return false positives,
-        // so return the position at which we advanced to, aligned to the
-        // haystack.
-        Some(i + self.rare1i + 1)
-    }
-
-    /// Return the heuristical frequency rank of the given byte. A lower rank
-    /// means the byte is believed to occur less frequently.
-    fn rank(b: u8) -> usize {
-        BYTE_FREQUENCIES[b as usize] as usize
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use ext_slice::B;
-
-    #[test]
-    fn freqy_forward() {
-        // N.B. We sometimes use uppercase here since that mostly ensures freqy
-        // will be constructable. Lowercase letters may be too common for freqy
-        // to work.
-
-        let s = Freqy::forward(B("BAR"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(0), s.find_candidate(&mut pre, B("BARFOO")));
-
-        let s = Freqy::forward(B("BAR"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(3), s.find_candidate(&mut pre, B("FOOBAR")));
-
-        let s = Freqy::forward(B("zyzy"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(0), s.find_candidate(&mut pre, B("zyzz")));
-
-        let s = Freqy::forward(B("zyzy"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(2), s.find_candidate(&mut pre, B("zzzy")));
-
-        let s = Freqy::forward(B("zyzy"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(None, s.find_candidate(&mut pre, B("zazb")));
-
-        let s = Freqy::forward(B("yzyz"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(0), s.find_candidate(&mut pre, B("yzyy")));
-
-        let s = Freqy::forward(B("yzyz"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(2), s.find_candidate(&mut pre, B("yyyz")));
-
-        let s = Freqy::forward(B("yzyz"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(None, s.find_candidate(&mut pre, B("yayb")));
-    }
-
-    #[test]
-    fn freqy_reverse() {
-        // N.B. We sometimes use uppercase here since that mostly ensures freqy
-        // will be constructable. Lowercase letters may be too common for freqy
-        // to work.
-
-        let s = Freqy::reverse(B("BAR"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(3), s.rfind_candidate(&mut pre, B("BARFOO")));
-
-        let s = Freqy::reverse(B("BAR"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(6), s.rfind_candidate(&mut pre, B("FOOBAR")));
-
-        let s = Freqy::reverse(B("zyzy"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("zyzz")));
-
-        let s = Freqy::reverse(B("zyzy"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("zzzy")));
-
-        let s = Freqy::reverse(B("zyzy"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(None, s.rfind_candidate(&mut pre, B("zazb")));
-
-        let s = Freqy::reverse(B("yzyz"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("yzyy")));
-
-        let s = Freqy::reverse(B("yzyz"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("yyyz")));
-
-        let s = Freqy::reverse(B("yzyz"));
-        let mut pre = s.prefilter_state();
-        assert_eq!(None, s.rfind_candidate(&mut pre, B("yayb")));
-    }
-}