diff options
author | Chih-Hung Hsieh <chh@google.com> | 2020-10-26 23:34:49 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2020-10-26 23:34:49 +0000 |
commit | 910574851b504a9aff6861089887f234df2bdf88 (patch) | |
tree | e873b7eb9bb377a84845126bfbbbc5301481d3bc | |
parent | ff6b1ad934393f85da3f7edc647555ccbec0205d (diff) | |
parent | 4ba5657398b12fa17ad53e750a42a9b67c2de8af (diff) | |
download | regex-910574851b504a9aff6861089887f234df2bdf88.tar.gz |
Upgrade rust/crates/regex to 1.4.1 am: 849e4457e7 am: 7767e43fa4 am: 4ba5657398
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/regex/+/1474897
Change-Id: I21593a96bd772c83839151aa230434c2ff6c168e
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | Android.bp | 4 | ||||
-rw-r--r-- | CHANGELOG.md | 47 | ||||
-rw-r--r-- | Cargo.toml | 4 | ||||
-rw-r--r-- | Cargo.toml.orig | 4 | ||||
-rw-r--r-- | METADATA | 8 | ||||
-rw-r--r-- | src/compile.rs | 2 | ||||
-rw-r--r-- | src/dfa.rs | 12 | ||||
-rw-r--r-- | src/expand.rs | 50 | ||||
-rw-r--r-- | src/lib.rs | 8 | ||||
-rw-r--r-- | src/pikevm.rs | 2 | ||||
-rw-r--r-- | src/re_bytes.rs | 19 | ||||
-rw-r--r-- | src/re_set.rs | 18 | ||||
-rw-r--r-- | src/re_trait.rs | 1 | ||||
-rw-r--r-- | src/re_unicode.rs | 21 | ||||
-rw-r--r-- | tests/api.rs | 12 | ||||
-rw-r--r-- | tests/set.rs | 11 | ||||
-rw-r--r-- | tests/unicode.rs | 3 |
18 files changed, 173 insertions, 55 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index bd8a005..407f0d7 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,5 @@ { "git": { - "sha1": "691606773f525be32a59a0c28eae203a79663706" + "sha1": "d5bf98f293b48174d5378471d01c2e0ef271bbbc" } } @@ -34,8 +34,8 @@ rust_library_host { } // dependent_library ["feature_list"] -// aho-corasick-0.7.13 "default,std" +// aho-corasick-0.7.14 "default,std" // lazy_static-1.4.0 // memchr-2.3.3 "default,std,use_std" -// regex-syntax-0.6.18 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment" +// regex-syntax-0.6.20 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment" // thread_local-1.0.1 diff --git a/CHANGELOG.md b/CHANGELOG.md index c7e528d..82aa089 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,44 @@ +1.4.1 (2020-10-13) +================== +This is a small bug fix release that makes `\p{cf}` work. Previously, it would +report "property not found" even though `cf` is a valid abbreviation for the +`Format` general category. + +* [BUG #719](https://github.com/rust-lang/regex/issues/719): + Fixes bug that prevented `\p{cf}` from working. + + +1.4.0 (2020-10-11) +================== +This releases has a few minor documentation fixes as well as some very minor +API additions. The MSRV remains at Rust 1.28 for now, but this is intended to +increase to at least Rust 1.41.1 soon. + +This release also adds support for OSS-Fuzz. Kudos to +[@DavidKorczynski](https://github.com/DavidKorczynski) +for doing the heavy lifting for that! + +New features: + +* [FEATURE #649](https://github.com/rust-lang/regex/issues/649): + Support `[`, `]` and `.` in capture group names. +* [FEATURE #687](https://github.com/rust-lang/regex/issues/687): + Add `is_empty` predicate to `RegexSet`. +* [FEATURE #689](https://github.com/rust-lang/regex/issues/689): + Implement `Clone` for `SubCaptureMatches`. +* [FEATURE #715](https://github.com/rust-lang/regex/issues/715): + Add `empty` constructor to `RegexSet` for convenience. + +Bug fixes: + +* [BUG #694](https://github.com/rust-lang/regex/issues/694): + Fix doc example for `Replacer::replace_append`. +* [BUG #698](https://github.com/rust-lang/regex/issues/698): + Clarify docs for `s` flag when using a `bytes::Regex`. +* [BUG #711](https://github.com/rust-lang/regex/issues/711): + Clarify `is_match` docs to indicate that it can match anywhere in string. + + 1.3.9 (2020-05-28) ================== This release fixes a MSRV (Minimum Support Rust Version) regression in the @@ -6,7 +47,7 @@ compile on other Rust versions, such as Rust 1.39. Bug fixes: -* [BUG #685](https://github.com/rust-lang/regex/issue/685): +* [BUG #685](https://github.com/rust-lang/regex/issues/685): Remove use of `doc_comment` crate, which cannot be used before Rust 1.43. @@ -22,9 +63,9 @@ Bug fixes: * [BUG #523](https://github.com/rust-lang/regex/pull/523): Add note to documentation that spaces can be escaped in `x` mode. -* [BUG #524](https://github.com/rust-lang/regex/issue/524): +* [BUG #524](https://github.com/rust-lang/regex/issues/524): Add support for empty sub-expressions, including empty alternations. -* [BUG #659](https://github.com/rust-lang/regex/issue/659): +* [BUG #659](https://github.com/rust-lang/regex/issues/659): Fix match bug caused by an empty sub-expression miscompilation. @@ -12,7 +12,7 @@ [package] name = "regex" -version = "1.3.9" +version = "1.4.1" authors = ["The Rust Project Developers"] exclude = ["/scripts/*", "/.github/*"] autotests = false @@ -80,7 +80,7 @@ version = "2.2.1" optional = true [dependencies.regex-syntax] -version = "0.6.18" +version = "0.6.20" default-features = false [dependencies.thread_local] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 1296ae0..fd238a9 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.3.9" #:version +version = "1.4.1" #:version authors = ["The Rust Project Developers"] license = "MIT OR Apache-2.0" readme = "README.md" @@ -118,7 +118,7 @@ optional = true # For parsing regular expressions. [dependencies.regex-syntax] path = "regex-syntax" -version = "0.6.18" +version = "0.6.20" default-features = false [dev-dependencies] @@ -7,13 +7,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/regex/regex-1.3.9.crate" + value: "https://static.crates.io/crates/regex/regex-1.4.1.crate" } - version: "1.3.9" + version: "1.4.1" license_type: NOTICE last_upgrade_date { year: 2020 - month: 5 - day: 28 + month: 10 + day: 26 } } diff --git a/src/compile.rs b/src/compile.rs index ad54040..cdc583c 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -222,7 +222,7 @@ impl Compiler { /// hole /// ``` /// - /// To compile two expressions, e1 and e2, concatinated together we + /// To compile two expressions, e1 and e2, concatenated together we /// would do: /// /// ```ignore @@ -679,7 +679,7 @@ impl<'a> Fsm<'a> { } } else if next_si & STATE_START > 0 { // A start state isn't in the common case because we may - // what to do quick prefix scanning. If the program doesn't + // want to do quick prefix scanning. If the program doesn't // have a detected prefix, then start states are actually // considered common and this case is never reached. debug_assert!(self.has_prefix()); @@ -725,7 +725,7 @@ impl<'a> Fsm<'a> { } } - // Run the DFA once more on the special EOF senitnel value. + // Run the DFA once more on the special EOF sentinel value. // We don't care about the special bits in the state pointer any more, // so get rid of them. prev_si &= STATE_MAX; @@ -830,7 +830,7 @@ impl<'a> Fsm<'a> { } } - // Run the DFA once more on the special EOF senitnel value. + // Run the DFA once more on the special EOF sentinel value. prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { None => return Result::Quit, Some(STATE_DEAD) => return result.set_non_match(0), @@ -913,8 +913,8 @@ impl<'a> Fsm<'a> { if self.state(si).flags().has_empty() { // Compute the flags immediately preceding the current byte. // This means we only care about the "end" or "end line" flags. - // (The "start" flags are computed immediately proceding the - // current byte and is handled below.) + // (The "start" flags are computed immediately following the + // current byte and are handled below.) let mut flags = EmptyFlags::default(); if b.is_eof() { flags.end = true; @@ -1048,7 +1048,7 @@ impl<'a> Fsm<'a> { /// /// If matching starts after the beginning of the input, then only start /// line should be set if the preceding byte is `\n`. End line should never - /// be set in this case. (Even if the proceding byte is a `\n`, it will + /// be set in this case. (Even if the following byte is a `\n`, it will /// be handled in a subsequent DFA state.) fn follow_epsilons( &mut self, diff --git a/src/expand.rs b/src/expand.rs index 528f55e..fd2ab03 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -24,7 +24,7 @@ pub fn expand_str( continue; } debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement) { + let cap_ref = match find_cap_ref(replacement.as_bytes()) { Some(cap_ref) => cap_ref, None => { dst.push_str("$"); @@ -125,19 +125,15 @@ impl From<usize> for Ref<'static> { /// starting at the beginning of `replacement`. /// /// If no such valid reference could be found, None is returned. -fn find_cap_ref<T: ?Sized + AsRef<[u8]>>( - replacement: &T, -) -> Option<CaptureRef> { +fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> { let mut i = 0; let rep: &[u8] = replacement.as_ref(); if rep.len() <= 1 || rep[0] != b'$' { return None; } - let mut brace = false; i += 1; if rep[i] == b'{' { - brace = true; - i += 1; + return find_cap_ref_braced(rep, i + 1); } let mut cap_end = i; while rep.get(cap_end).map_or(false, is_valid_cap_letter) { @@ -151,12 +147,6 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>( // check with either unsafe or by parsing the number straight from &[u8]. let cap = str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); - if brace { - if !rep.get(cap_end).map_or(false, |&b| b == b'}') { - return None; - } - cap_end += 1; - } Some(CaptureRef { cap: match cap.parse::<u32>() { Ok(i) => Ref::Number(i as usize), @@ -166,6 +156,31 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>( }) } +fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> { + let start = i; + while rep.get(i).map_or(false, |&b| b != b'}') { + i += 1; + } + if !rep.get(i).map_or(false, |&b| b == b'}') { + return None; + } + // When looking at braced names, we don't put any restrictions on the name, + // so it's possible it could be invalid UTF-8. But a capture group name + // can never be invalid UTF-8, so if we have invalid UTF-8, then we can + // safely return None. + let cap = match str::from_utf8(&rep[start..i]) { + Err(_) => return None, + Ok(cap) => cap, + }; + Some(CaptureRef { + cap: match cap.parse::<u32>() { + Ok(i) => Ref::Number(i as usize), + Err(_) => Ref::Named(cap), + }, + end: i + 1, + }) +} + /// Returns true if and only if the given byte is allowed in a capture name. fn is_valid_cap_letter(b: &u8) -> bool { match *b { @@ -182,13 +197,13 @@ mod tests { ($name:ident, $text:expr) => { #[test] fn $name() { - assert_eq!(None, find_cap_ref($text)); + assert_eq!(None, find_cap_ref($text.as_bytes())); } }; ($name:ident, $text:expr, $capref:expr) => { #[test] fn $name() { - assert_eq!(Some($capref), find_cap_ref($text)); + assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); } }; } @@ -204,7 +219,8 @@ mod tests { find!(find_cap_ref3, "$0", c!(0, 2)); find!(find_cap_ref4, "$5", c!(5, 2)); find!(find_cap_ref5, "$10", c!(10, 3)); - // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers + // See https://github.com/rust-lang/regex/pull/585 + // for more on characters following numbers find!(find_cap_ref6, "$42a", c!("42a", 4)); find!(find_cap_ref7, "${42}a", c!(42, 5)); find!(find_cap_ref8, "${42"); @@ -217,4 +233,6 @@ mod tests { find!(find_cap_ref15, "$1_$2", c!("1_", 3)); find!(find_cap_ref16, "$x-$y", c!("x", 2)); find!(find_cap_ref17, "$x_$y", c!("x_", 3)); + find!(find_cap_ref18, "${#}", c!("#", 4)); + find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); } @@ -365,7 +365,7 @@ $ the end of text (or end-of-line with multi-line mode) <pre class="rust"> (exp) numbered capture group (indexed by opening parenthesis) -(?P<name>exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z]) +(?P<name>exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]]) (?:exp) non-capturing group (?flags) set flags within current group (?flags:exp) set flags for exp (non-capturing) @@ -562,7 +562,7 @@ All features below are enabled by default. [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). * **unicode-gencat** - Provide the data for - [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). + [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). This includes, but is not limited to, `Decimal_Number`, `Letter`, `Math_Symbol`, `Number` and `Punctuation`. * **unicode-perl** - @@ -731,8 +731,8 @@ Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when enabled. -6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value. -When the `s` flag is enabled, `.` matches any byte. +6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the +`s` flag is additionally enabled, `.` matches any byte. # Performance diff --git a/src/pikevm.rs b/src/pikevm.rs index c106c76..299087d 100644 --- a/src/pikevm.rs +++ b/src/pikevm.rs @@ -8,7 +8,7 @@ // // It can do more than the DFA can (specifically, record capture locations // and execute Unicode word boundary assertions), but at a slower speed. -// Specifically, the Pike VM exectues a DFA implicitly by repeatedly expanding +// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding // epsilon transitions. That is, the Pike VM engine can be in multiple states // at once where as the DFA is only ever in one state at a time. // diff --git a/src/re_bytes.rs b/src/re_bytes.rs index 69f0b33..ca01e0e 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -119,7 +119,8 @@ impl Regex { RegexBuilder::new(re).build() } - /// Returns true if and only if the regex matches the string given. + /// Returns true if and only if there is a match for the regex in the + /// string given. /// /// It is recommended to use this method if all you need to do is test /// a match, since the underlying matching engine may be able to do less @@ -930,17 +931,22 @@ impl<'t> Captures<'t> { /// Expands all instances of `$name` in `replacement` to the corresponding /// capture group `name`, and writes them to the `dst` buffer given. /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the + /// `name` may be an integer corresponding to the index of the capture + /// group (counted by order of opening parenthesis where `0` is the /// entire match) or it can be a name (consisting of letters, digits or /// underscores) corresponding to a named capture group. /// /// If `name` isn't a valid capture group (whether the name doesn't exist /// or isn't a valid index), then it is replaced with the empty string. /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. + /// The longest possible name consisting of the characters `[_0-9A-Za-z]` + /// is used. e.g., `$1a` looks up the capture group named `1a` and not the + /// capture group at index `1`. To exert more precise control over the + /// name, or to refer to a capture group name that uses characters outside + /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When + /// using braces, any sequence of valid UTF-8 bytes is permitted. If the + /// sequence does not refer to a capture group name in the corresponding + /// regex, then it is replaced with an empty string. /// /// To write a literal `$` use `$$`. pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { @@ -1051,6 +1057,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> { /// /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and /// the lifetime `'t` corresponds to the originally matched text. +#[derive(Clone)] pub struct SubCaptureMatches<'c, 't: 'c> { caps: &'c Captures<'t>, it: SubCapturesPosIter<'c>, diff --git a/src/re_set.rs b/src/re_set.rs index fc2b61a..b8954be 100644 --- a/src/re_set.rs +++ b/src/re_set.rs @@ -96,6 +96,19 @@ impl RegexSet { RegexSetBuilder::new(exprs).build() } + /// Create a new empty regex set. + /// + /// # Example + /// + /// ```rust + /// # use regex::RegexSet; + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// ``` + pub fn empty() -> RegexSet { + RegexSetBuilder::new(&[""; 0]).build().unwrap() + } + /// Returns true if and only if one of the regexes in this set matches /// the text given. /// @@ -207,6 +220,11 @@ impl RegexSet { self.0.regex_strings().len() } + /// Returns `true` if this set contains no regular expressions. + pub fn is_empty(&self) -> bool { + self.0.regex_strings().is_empty() + } + /// Returns the patterns that this set will match on. /// /// This function can be used to determine the pattern for a match. The diff --git a/src/re_trait.rs b/src/re_trait.rs index b56804e..d14a9f7 100644 --- a/src/re_trait.rs +++ b/src/re_trait.rs @@ -51,6 +51,7 @@ impl Locations { /// Positions are byte indices in terms of the original string matched. /// /// `'c` is the lifetime of the captures. +#[derive(Clone)] pub struct SubCapturesPosIter<'c> { idx: usize, locs: &'c Locations, diff --git a/src/re_unicode.rs b/src/re_unicode.rs index b746599..ea95c1b 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -175,7 +175,8 @@ impl Regex { RegexBuilder::new(re).build() } - /// Returns true if and only if the regex matches the string given. + /// Returns true if and only if there is a match for the regex in the + /// string given. /// /// It is recommended to use this method if all you need to do is test /// a match, since the underlying matching engine may be able to do less @@ -947,17 +948,22 @@ impl<'t> Captures<'t> { /// Expands all instances of `$name` in `replacement` to the corresponding /// capture group `name`, and writes them to the `dst` buffer given. /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the + /// `name` may be an integer corresponding to the index of the capture + /// group (counted by order of opening parenthesis where `0` is the /// entire match) or it can be a name (consisting of letters, digits or /// underscores) corresponding to a named capture group. /// /// If `name` isn't a valid capture group (whether the name doesn't exist /// or isn't a valid index), then it is replaced with the empty string. /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. + /// The longest possible name consisting of the characters `[_0-9A-Za-z]` + /// is used. e.g., `$1a` looks up the capture group named `1a` and not the + /// capture group at index `1`. To exert more precise control over the + /// name, or to refer to a capture group name that uses characters outside + /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When + /// using braces, any sequence of characters is permitted. If the sequence + /// does not refer to a capture group name in the corresponding regex, then + /// it is replaced with an empty string. /// /// To write a literal `$` use `$$`. pub fn expand(&self, replacement: &str, dst: &mut String) { @@ -1053,6 +1059,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> { /// /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and /// the lifetime `'t` corresponds to the originally matched text. +#[derive(Clone)] pub struct SubCaptureMatches<'c, 't: 'c> { caps: &'c Captures<'t>, it: SubCapturesPosIter<'c>, @@ -1122,7 +1129,7 @@ pub trait Replacer { /// have a match at capture group `0`. /// /// For example, a no-op replacement would be - /// `dst.extend(caps.get(0).unwrap().as_str())`. + /// `dst.push_str(caps.get(0).unwrap().as_str())`. fn replace_append(&mut self, caps: &Captures, dst: &mut String); /// Return a fixed unchanging replacement string. diff --git a/tests/api.rs b/tests/api.rs index 0d4962c..c7250a8 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -195,6 +195,18 @@ expand!( ); expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", ""); +expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%"); +expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc["); +expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{"); +expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}"); +expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%"); +expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%"); +expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc["); +expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "["); +expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "["); +expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "["); +expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "["); + split!( split1, r"(?-u)\s+", diff --git a/tests/set.rs b/tests/set.rs index 648feec..37fcf87 100644 --- a/tests/set.rs +++ b/tests/set.rs @@ -54,3 +54,14 @@ fn get_set_patterns() { let set = regex_set!(&["a", "b"]); assert_eq!(vec!["a", "b"], set.patterns()); } + +#[test] +fn len_and_empty() { + let empty = regex_set!(&[""; 0]); + assert_eq!(empty.len(), 0); + assert!(empty.is_empty()); + + let not_empty = regex_set!(&["ab", "b"]); + assert_eq!(not_empty.len(), 2); + assert!(!not_empty.is_empty()); +} diff --git a/tests/unicode.rs b/tests/unicode.rs index 52522f4..9f1cd0c 100644 --- a/tests/unicode.rs +++ b/tests/unicode.rs @@ -74,6 +74,9 @@ mat!( Some((0, 3)) ); mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4))); +// See: https://github.com/rust-lang/regex/issues/719 +mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4))); +mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4))); mat!( uni_class_gencat_initial_punctuation, r"\p{Initial_Punctuation}", |