From 8249a3d7e19156f08ae81ef676b8fe0350d8ea27 Mon Sep 17 00:00:00 2001 From: Joel Galenson Date: Mon, 21 Jun 2021 14:01:00 -0700 Subject: Upgrade rust/crates/regex-automata to 0.1.10 Test: make Change-Id: Iba14ce63ff5be85b611cd1768692d8fb6a2aa6e8 --- .cargo_vcs_info.json | 2 +- .github/workflows/ci.yml | 6 ++-- Android.bp | 5 +--- Cargo.toml | 6 +--- Cargo.toml.orig | 3 +- METADATA | 8 ++--- README.md | 11 +++---- TEST_MAPPING | 11 +++++++ src/byteorder.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ src/determinize.rs | 3 +- src/lib.rs | 2 +- src/sparse_set.rs | 2 +- 12 files changed, 108 insertions(+), 27 deletions(-) create mode 100644 TEST_MAPPING create mode 100644 src/byteorder.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 7aad2fd..041c1cf 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,5 @@ { "git": { - "sha1": "4e0e8ec599e92b115c53ed8d760f7c38bf91891f" + "sha1": "dd59a74be412e349bf6df528a216a13c2cf57262" } } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc98cce..8bdc61a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: include: - build: pinned os: ubuntu-18.04 - rust: 1.28.0 + rust: 1.41.1 - build: stable os: ubuntu-18.04 rust: stable @@ -82,8 +82,8 @@ jobs: # FIXME: to work around bugs in latest cross release, install master. # See: https://github.com/rust-embedded/cross/issues/357 cargo install --git https://github.com/rust-embedded/cross - echo "::set-env name=CARGO::cross" - echo "::set-env name=TARGET::--target ${{ matrix.target }}" + echo "CARGO=cross" >> $GITHUB_ENV + echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV - name: Show command used for Cargo run: | diff --git a/Android.bp b/Android.bp index 7e00d34..3a11a22 100644 --- a/Android.bp +++ b/Android.bp @@ -42,7 +42,6 @@ license { rust_library { name: "libregex_automata", - // has rustc warnings host_supported: true, crate_name: "regex_automata", srcs: ["src/lib.rs"], @@ -57,11 +56,9 @@ rust_library { "-C opt-level=3", ], rustlibs: [ - "libbyteorder", "libregex_syntax", ], } // dependent_library ["feature_list"] -// byteorder-1.4.2 -// regex-syntax-0.6.22 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment" +// regex-syntax-0.6.25 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment" diff --git a/Cargo.toml b/Cargo.toml index 7ef891a..b4fcd7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ [package] name = "regex-automata" -version = "0.1.9" +version = "0.1.10" authors = ["Andrew Gallant "] exclude = ["/.travis.yml", "/appveyor.yml", "/ci/*", "/scripts/*", "/regex-automata-debug"] autoexamples = false @@ -45,10 +45,6 @@ bench = false [[test]] name = "default" path = "tests/tests.rs" -[dependencies.byteorder] -version = "1.2.7" -default-features = false - [dependencies.fst] version = "0.4.0" optional = true diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 2f5eb41..ce1c704 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "regex-automata" -version = "0.1.9" #:version +version = "0.1.10" #:version authors = ["Andrew Gallant "] description = "Automata construction and matching using regular expressions." documentation = "https://docs.rs/regex-automata" @@ -40,7 +40,6 @@ std = ["regex-syntax"] transducer = ["std", "fst"] [dependencies] -byteorder = { version = "1.2.7", default-features = false } fst = { version = "0.4.0", optional = true } regex-syntax = { version = "0.6.16", optional = true } diff --git a/METADATA b/METADATA index 421e63a..c821873 100644 --- a/METADATA +++ b/METADATA @@ -7,13 +7,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/regex-automata/regex-automata-0.1.9.crate" + value: "https://static.crates.io/crates/regex-automata/regex-automata-0.1.10.crate" } - version: "0.1.9" + version: "0.1.10" license_type: NOTICE last_upgrade_date { - year: 2020 - month: 12 + year: 2021 + month: 6 day: 21 } } diff --git a/README.md b/README.md index 2acf065..8eaf03f 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,10 @@ configuring the best space vs time trade off for your use case and provides support for cheap deserialization of automata for use in `no_std` environments. [![Build status](https://github.com/BurntSushi/regex-automata/workflows/ci/badge.svg)](https://github.com/BurntSushi/regex-automata/actions) -[![](http://meritbadge.herokuapp.com/regex-automata)](https://crates.io/crates/regex-automata) +[![on crates.io](https://meritbadge.herokuapp.com/regex-automata)](https://crates.io/crates/regex-automata) +![Minimum Supported Rust Version 1.41](https://img.shields.io/badge/rustc-1.41-green) -Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). +Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). ### Documentation @@ -182,10 +183,10 @@ With some of the downsides out of the way, here are some positive differences: * Stretch goal: support capturing groups by implementing "tagged" DFA (transducers). Laurikari's paper is the usual reference here, but Trofimovich has a much more thorough treatment here: - http://re2c.org/2017_trofimovich_tagged_deterministic_finite_automata_with_lookahead.pdf + https://re2c.org/2017_trofimovich_tagged_deterministic_finite_automata_with_lookahead.pdf I've only read the paper once. I suspect it will require at least a few more read throughs before I understand it. - See also: http://re2c.org/ + See also: https://re2c.org * Possibly less ambitious goal: can we select a portion of Trofimovich's work to make small fixed length look-around work? It would be really nice to support ^, $ and \b, especially the Unicode variant of \b and CRLF aware $. @@ -219,4 +220,4 @@ With some of the downsides out of the way, here are some positive differences: If we could know whether a regex will exhibit state explosion or not, then we could make an intelligent decision about whether to ahead-of-time compile a DFA. - See: https://www.researchgate.net/profile/XU_Shutu/publication/229032602_Characterization_of_a_global_germplasm_collection_and_its_potential_utilization_for_analysis_of_complex_quantitative_traits_in_maize/links/02bfe50f914d04c837000000.pdf + See: https://www.researchgate.net/profile/Xu-Shutu/publication/229032602_Characterization_of_a_global_germplasm_collection_and_its_potential_utilization_for_analysis_of_complex_quantitative_traits_in_maize/links/02bfe50f914d04c837000000/Characterization-of-a-global-germplasm-collection-and-its-potential-utilization-for-analysis-of-complex-quantitative-traits-in-maize.pdf diff --git a/TEST_MAPPING b/TEST_MAPPING new file mode 100644 index 0000000..19817ee --- /dev/null +++ b/TEST_MAPPING @@ -0,0 +1,11 @@ +// Generated by update_crate_tests.py for tests that depend on this crate. +{ + "presubmit": [ + { + "name": "unicode-xid_device_test_src_lib" + }, + { + "name": "unicode-xid_device_test_tests_exhaustive_tests" + } + ] +} diff --git a/src/byteorder.rs b/src/byteorder.rs new file mode 100644 index 0000000..e909f93 --- /dev/null +++ b/src/byteorder.rs @@ -0,0 +1,76 @@ +use core::convert::TryInto; + +pub trait ByteOrder { + fn read_u16(buf: &[u8]) -> u16; + fn read_u32(buf: &[u8]) -> u32; + fn read_u64(buf: &[u8]) -> u64; + fn read_uint(buf: &[u8], nbytes: usize) -> u64; + fn write_u16(buf: &mut [u8], n: u16); + fn write_u32(buf: &mut [u8], n: u32); + fn write_u64(buf: &mut [u8], n: u64); + fn write_uint(buf: &mut [u8], n: u64, nbytes: usize); +} + +pub enum BigEndian {} +pub enum LittleEndian {} +pub enum NativeEndian {} + +macro_rules! impl_endian { + ($t:ty, $from_endian:ident, $to_endian:ident) => { + impl ByteOrder for $t { + #[inline] + fn read_u16(buf: &[u8]) -> u16 { + u16::$from_endian(buf[0..2].try_into().unwrap()) + } + + #[inline] + fn read_u32(buf: &[u8]) -> u32 { + u32::$from_endian(buf[0..4].try_into().unwrap()) + } + + #[inline] + fn read_u64(buf: &[u8]) -> u64 { + u64::$from_endian(buf[0..8].try_into().unwrap()) + } + + #[inline] + fn read_uint(buf: &[u8], nbytes: usize) -> u64 { + let mut dst = [0u8; 8]; + dst[..nbytes].copy_from_slice(&buf[..nbytes]); + u64::$from_endian(dst) + } + + #[inline] + fn write_u16(buf: &mut [u8], n: u16) { + buf[0..2].copy_from_slice(&n.$to_endian()[..]); + } + + #[inline] + fn write_u32(buf: &mut [u8], n: u32) { + buf[0..4].copy_from_slice(&n.$to_endian()[..]); + } + + #[inline] + fn write_u64(buf: &mut [u8], n: u64) { + buf[0..8].copy_from_slice(&n.$to_endian()[..]); + } + + #[inline] + fn write_uint(buf: &mut [u8], n: u64, nbytes: usize) { + buf[..nbytes].copy_from_slice(&n.$to_endian()[..nbytes]); + } + } + }; +} + +impl_endian! { + BigEndian, from_be_bytes, to_be_bytes +} + +impl_endian! { + LittleEndian, from_le_bytes, to_le_bytes +} + +impl_endian! { + NativeEndian, from_ne_bytes, to_ne_bytes +} diff --git a/src/determinize.rs b/src/determinize.rs index f300316..cf0c285 100644 --- a/src/determinize.rs +++ b/src/determinize.rs @@ -148,7 +148,8 @@ impl<'a, S: StateID> Determinizer<'a, S> { if let Some(&cached_id) = self.cache.get(&state) { // Since we have a cached state, put the constructed state's // memory back into our scratch space, so that it can be reused. - mem::replace(&mut self.scratch_nfa_states, state.nfa_states); + let _ = + mem::replace(&mut self.scratch_nfa_states, state.nfa_states); return Ok((cached_id, false)); } // Nothing was in the cache, so add this state to the cache. diff --git a/src/lib.rs b/src/lib.rs index 4d3e9c1..7894ecc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -290,7 +290,6 @@ extern crate core; #[cfg(all(test, feature = "transducer"))] extern crate bstr; -extern crate byteorder; #[cfg(feature = "transducer")] extern crate fst; #[cfg(feature = "std")] @@ -306,6 +305,7 @@ pub use regex::RegexBuilder; pub use sparse::SparseDFA; pub use state_id::StateID; +mod byteorder; mod classes; #[path = "dense.rs"] mod dense_imp; diff --git a/src/sparse_set.rs b/src/sparse_set.rs index 6f145ba..56743b0 100644 --- a/src/sparse_set.rs +++ b/src/sparse_set.rs @@ -6,7 +6,7 @@ use std::slice; /// entire set can also be done in constant time. Iteration yields elements /// in the order in which they were inserted. /// -/// The data structure is based on: http://research.swtch.com/sparse +/// The data structure is based on: https://research.swtch.com/sparse /// Note though that we don't actually use uninitialized memory. We generally /// reuse sparse sets, so the initial allocation cost is bareable. However, its /// other properties listed above are extremely useful. -- cgit v1.2.3