diff options
author | Haibo Huang <hhb@google.com> | 2020-11-04 16:59:50 -0800 |
---|---|---|
committer | Haibo Huang <hhb@google.com> | 2020-11-04 16:59:50 -0800 |
commit | dde9ae084db7f65519318f0733d9558688a7fbfd (patch) | |
tree | 3265acdfeb66f42a0c6871c67fee2cc3f90ee047 | |
parent | 2db690b09ca8e4f4d8f772e5d1f0969552fbfa70 (diff) | |
download | aho-corasick-dde9ae084db7f65519318f0733d9558688a7fbfd.tar.gz |
Upgrade rust/crates/aho-corasick to 0.7.15
Test: make
Change-Id: I7671aa936dc8c43d06fb7935b3313b6282ceea46
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | .github/workflows/ci.yml | 4 | ||||
-rw-r--r-- | Android.bp | 2 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | Cargo.toml.orig | 2 | ||||
-rw-r--r-- | METADATA | 8 | ||||
-rw-r--r-- | src/nfa.rs | 26 | ||||
-rw-r--r-- | src/tests.rs | 12 |
8 files changed, 41 insertions, 17 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 33e5654..ae0871a 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,5 @@ { "git": { - "sha1": "63f0b5252345fa50e490de42b3d82b5159c0e5dd" + "sha1": "3852632f10587db0ff72ef29e88d58bf305a0946" } } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06dcdd5..34ef326 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,8 +75,8 @@ jobs: # FIXME: to work around bugs in latest cross release, install master. # See: https://github.com/rust-embedded/cross/issues/357 cargo install --git https://github.com/rust-embedded/cross - echo "::set-env name=CARGO::cross" - echo "::set-env name=TARGET::--target ${{ matrix.target }}" + echo "CARGO=cross" >> $GITHUB_ENV + echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV - name: Show command used for Cargo run: | echo "cargo command is: ${{ env.CARGO }}" @@ -16,4 +16,4 @@ rust_library { } // dependent_library ["feature_list"] -// memchr-2.3.3 "std,use_std" +// memchr-2.3.4 "std,use_std" @@ -12,7 +12,7 @@ [package] name = "aho-corasick" -version = "0.7.14" +version = "0.7.15" authors = ["Andrew Gallant <jamslam@gmail.com>"] exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"] autotests = false diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 0b64728..37a6852 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "aho-corasick" -version = "0.7.14" #:version +version = "0.7.15" #:version authors = ["Andrew Gallant <jamslam@gmail.com>"] description = "Fast multiple substring searching." homepage = "https://github.com/BurntSushi/aho-corasick" @@ -7,13 +7,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/aho-corasick/aho-corasick-0.7.14.crate" + value: "https://static.crates.io/crates/aho-corasick/aho-corasick-0.7.15.crate" } - version: "0.7.14" + version: "0.7.15" license_type: NOTICE last_upgrade_date { year: 2020 - month: 10 - day: 26 + month: 11 + day: 4 } } @@ -858,10 +858,17 @@ impl<'a, S: StateID> Compiler<'a, S> { while let Some(id) = queue.pop_front() { let mut it = self.nfa.iter_transitions_mut(id); while let Some((b, next)) = it.next() { - if !seen.contains(next) { - queue.push_back(next); - seen.insert(next); + if seen.contains(next) { + // The only way to visit a duplicate state in a transition + // list is when ASCII case insensitivity is enabled. In + // this case, we want to skip it since it's redundant work. + // But it would also end up duplicating matches, which + // results in reporting duplicate matches in some cases. + // See the 'acasei010' regression test. + continue; } + queue.push_back(next); + seen.insert(next); let mut fail = it.nfa().state(id).fail; while it.nfa().state(fail).next_state(b) == fail_id() { @@ -1012,10 +1019,17 @@ impl<'a, S: StateID> Compiler<'a, S> { // Queue up the next state. let next = item.next_queued_state(it.nfa(), next_id); - if !seen.contains(next.id) { - queue.push_back(next); - seen.insert(next.id); + if seen.contains(next.id) { + // The only way to visit a duplicate state in a transition + // list is when ASCII case insensitivity is enabled. In + // this case, we want to skip it since it's redundant work. + // But it would also end up duplicating matches, which + // results in reporting duplicate matches in some cases. + // See the 'acasei010' regression test. + continue; } + queue.push_back(next); + seen.insert(next.id); // Find the failure state for next. Same as standard. let mut fail = it.nfa().state(item.id).fail; diff --git a/src/tests.rs b/src/tests.rs index 29eba1d..668fbbf 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -564,12 +564,22 @@ const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[ const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[ t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]), t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]), + t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]), ]; /// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests. const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[ t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]), t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]), + // This is a regression test from: + // https://github.com/BurntSushi/aho-corasick/issues/68 + // Previously, it was reporting a duplicate (1, 3, 6) match. + t!( + acasei010, + &["abc", "def", "abcdef"], + "abcdef", + &[(0, 0, 3), (2, 0, 6), (1, 3, 6)] + ), ]; /// Regression tests that are applied to all Aho-Corasick combinations. @@ -1153,7 +1163,7 @@ fn regression_case_insensitive_prefilter() { // See: https://github.com/BurntSushi/aho-corasick/issues/64 // -// This occurs when the rare byte prefilter is active +// This occurs when the rare byte prefilter is active. #[test] fn regression_stream_rare_byte_prefilter() { use std::io::Read; |