Upgrade rust/crates/regex to 1.4.1 am: 849e4457e7 am: 7767e43fa4 am: 4ba5657398

Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/regex/+/1474897 Change-Id: I21593a96bd772c83839151aa230434c2ff6c168e
author: Chih-Hung Hsieh <chh@google.com> 2020-10-26 23:34:49 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2020-10-26 23:34:49 +0000
commit: 910574851b504a9aff6861089887f234df2bdf88 (patch)
tree: e873b7eb9bb377a84845126bfbbbc5301481d3bc
parent: ff6b1ad934393f85da3f7edc647555ccbec0205d (diff)
parent: 4ba5657398b12fa17ad53e750a42a9b67c2de8af (diff)
download: regex-910574851b504a9aff6861089887f234df2bdf88.tar.gz
18 files changed, 173 insertions, 55 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index bd8a005..407f0d7 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "691606773f525be32a59a0c28eae203a79663706"
+    "sha1": "d5bf98f293b48174d5378471d01c2e0ef271bbbc"
   }
 }
diff --git a/Android.bp b/Android.bp
index d122541..f2e1727 100644
--- a/Android.bp
+++ b/Android.bp
@@ -34,8 +34,8 @@ rust_library_host {
 }
 
 // dependent_library ["feature_list"]
-//   aho-corasick-0.7.13 "default,std"
+//   aho-corasick-0.7.14 "default,std"
 //   lazy_static-1.4.0
 //   memchr-2.3.3 "default,std,use_std"
-//   regex-syntax-0.6.18 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
+//   regex-syntax-0.6.20 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
 //   thread_local-1.0.1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7e528d..82aa089 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,44 @@
+1.4.1 (2020-10-13)
+==================
+This is a small bug fix release that makes `\p{cf}` work. Previously, it would
+report "property not found" even though `cf` is a valid abbreviation for the
+`Format` general category.
+
+* [BUG #719](https://github.com/rust-lang/regex/issues/719):
+  Fixes bug that prevented `\p{cf}` from working.
+
+
+1.4.0 (2020-10-11)
+==================
+This releases has a few minor documentation fixes as well as some very minor
+API additions. The MSRV remains at Rust 1.28 for now, but this is intended to
+increase to at least Rust 1.41.1 soon.
+
+This release also adds support for OSS-Fuzz. Kudos to
+[@DavidKorczynski](https://github.com/DavidKorczynski)
+for doing the heavy lifting for that!
+
+New features:
+
+* [FEATURE #649](https://github.com/rust-lang/regex/issues/649):
+  Support `[`, `]` and `.` in capture group names.
+* [FEATURE #687](https://github.com/rust-lang/regex/issues/687):
+  Add `is_empty` predicate to `RegexSet`.
+* [FEATURE #689](https://github.com/rust-lang/regex/issues/689):
+  Implement `Clone` for `SubCaptureMatches`.
+* [FEATURE #715](https://github.com/rust-lang/regex/issues/715):
+  Add `empty` constructor to `RegexSet` for convenience.
+
+Bug fixes:
+
+* [BUG #694](https://github.com/rust-lang/regex/issues/694):
+  Fix doc example for `Replacer::replace_append`.
+* [BUG #698](https://github.com/rust-lang/regex/issues/698):
+  Clarify docs for `s` flag when using a `bytes::Regex`.
+* [BUG #711](https://github.com/rust-lang/regex/issues/711):
+  Clarify `is_match` docs to indicate that it can match anywhere in string.
+
+
 1.3.9 (2020-05-28)
 ==================
 This release fixes a MSRV (Minimum Support Rust Version) regression in the
@@ -6,7 +47,7 @@ compile on other Rust versions, such as Rust 1.39.
 
 Bug fixes:
 
-* [BUG #685](https://github.com/rust-lang/regex/issue/685):
+* [BUG #685](https://github.com/rust-lang/regex/issues/685):
   Remove use of `doc_comment` crate, which cannot be used before Rust 1.43.
 
 
@@ -22,9 +63,9 @@ Bug fixes:
 
 * [BUG #523](https://github.com/rust-lang/regex/pull/523):
   Add note to documentation that spaces can be escaped in `x` mode.
-* [BUG #524](https://github.com/rust-lang/regex/issue/524):
+* [BUG #524](https://github.com/rust-lang/regex/issues/524):
   Add support for empty sub-expressions, including empty alternations.
-* [BUG #659](https://github.com/rust-lang/regex/issue/659):
+* [BUG #659](https://github.com/rust-lang/regex/issues/659):
   Fix match bug caused by an empty sub-expression miscompilation.
 
 
diff --git a/Cargo.toml b/Cargo.toml
index 02caabb..bc455bc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@
 
 [package]
 name = "regex"
-version = "1.3.9"
+version = "1.4.1"
 authors = ["The Rust Project Developers"]
 exclude = ["/scripts/*", "/.github/*"]
 autotests = false
@@ -80,7 +80,7 @@ version = "2.2.1"
 optional = true
 
 [dependencies.regex-syntax]
-version = "0.6.18"
+version = "0.6.20"
 default-features = false
 
 [dependencies.thread_local]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 1296ae0..fd238a9 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
 [package]
 name = "regex"
-version = "1.3.9"  #:version
+version = "1.4.1"  #:version
 authors = ["The Rust Project Developers"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@@ -118,7 +118,7 @@ optional = true
 # For parsing regular expressions.
 [dependencies.regex-syntax]
 path = "regex-syntax"
-version = "0.6.18"
+version = "0.6.20"
 default-features = false
 
 [dev-dependencies]
diff --git a/METADATA b/METADATA
index 644cabf..86023f9 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/regex/regex-1.3.9.crate"
+    value: "https://static.crates.io/crates/regex/regex-1.4.1.crate"
   }
-  version: "1.3.9"
+  version: "1.4.1"
   license_type: NOTICE
   last_upgrade_date {
     year: 2020
-    month: 5
-    day: 28
+    month: 10
+    day: 26
   }
 }
diff --git a/src/compile.rs b/src/compile.rs
index ad54040..cdc583c 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -222,7 +222,7 @@ impl Compiler {
     ///                                         hole
     /// ```
     ///
-    /// To compile two expressions, e1 and e2, concatinated together we
+    /// To compile two expressions, e1 and e2, concatenated together we
     /// would do:
     ///
     /// ```ignore
diff --git a/src/dfa.rs b/src/dfa.rs
index decc3b9..2a365ee 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -679,7 +679,7 @@ impl<'a> Fsm<'a> {
                 }
             } else if next_si & STATE_START > 0 {
                 // A start state isn't in the common case because we may
-                // what to do quick prefix scanning. If the program doesn't
+                // want to do quick prefix scanning. If the program doesn't
                 // have a detected prefix, then start states are actually
                 // considered common and this case is never reached.
                 debug_assert!(self.has_prefix());
@@ -725,7 +725,7 @@ impl<'a> Fsm<'a> {
             }
         }
 
-        // Run the DFA once more on the special EOF senitnel value.
+        // Run the DFA once more on the special EOF sentinel value.
         // We don't care about the special bits in the state pointer any more,
         // so get rid of them.
         prev_si &= STATE_MAX;
@@ -830,7 +830,7 @@ impl<'a> Fsm<'a> {
             }
         }
 
-        // Run the DFA once more on the special EOF senitnel value.
+        // Run the DFA once more on the special EOF sentinel value.
         prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
             None => return Result::Quit,
             Some(STATE_DEAD) => return result.set_non_match(0),
@@ -913,8 +913,8 @@ impl<'a> Fsm<'a> {
         if self.state(si).flags().has_empty() {
             // Compute the flags immediately preceding the current byte.
             // This means we only care about the "end" or "end line" flags.
-            // (The "start" flags are computed immediately proceding the
-            // current byte and is handled below.)
+            // (The "start" flags are computed immediately following the
+            // current byte and are handled below.)
             let mut flags = EmptyFlags::default();
             if b.is_eof() {
                 flags.end = true;
@@ -1048,7 +1048,7 @@ impl<'a> Fsm<'a> {
     ///
     /// If matching starts after the beginning of the input, then only start
     /// line should be set if the preceding byte is `\n`. End line should never
-    /// be set in this case. (Even if the proceding byte is a `\n`, it will
+    /// be set in this case. (Even if the following byte is a `\n`, it will
     /// be handled in a subsequent DFA state.)
     fn follow_epsilons(
         &mut self,
diff --git a/src/expand.rs b/src/expand.rs
index 528f55e..fd2ab03 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -24,7 +24,7 @@ pub fn expand_str(
             continue;
         }
         debug_assert!(!replacement.is_empty());
-        let cap_ref = match find_cap_ref(replacement) {
+        let cap_ref = match find_cap_ref(replacement.as_bytes()) {
             Some(cap_ref) => cap_ref,
             None => {
                 dst.push_str("$");
@@ -125,19 +125,15 @@ impl From<usize> for Ref<'static> {
 /// starting at the beginning of `replacement`.
 ///
 /// If no such valid reference could be found, None is returned.
-fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
-    replacement: &T,
-) -> Option<CaptureRef> {
+fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
     let mut i = 0;
     let rep: &[u8] = replacement.as_ref();
     if rep.len() <= 1 || rep[0] != b'$' {
         return None;
     }
-    let mut brace = false;
     i += 1;
     if rep[i] == b'{' {
-        brace = true;
-        i += 1;
+        return find_cap_ref_braced(rep, i + 1);
     }
     let mut cap_end = i;
     while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
@@ -151,12 +147,6 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
     // check with either unsafe or by parsing the number straight from &[u8].
     let cap =
         str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
-    if brace {
-        if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
-            return None;
-        }
-        cap_end += 1;
-    }
     Some(CaptureRef {
         cap: match cap.parse::<u32>() {
             Ok(i) => Ref::Number(i as usize),
@@ -166,6 +156,31 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
     })
 }
 
+fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> {
+    let start = i;
+    while rep.get(i).map_or(false, |&b| b != b'}') {
+        i += 1;
+    }
+    if !rep.get(i).map_or(false, |&b| b == b'}') {
+        return None;
+    }
+    // When looking at braced names, we don't put any restrictions on the name,
+    // so it's possible it could be invalid UTF-8. But a capture group name
+    // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
+    // safely return None.
+    let cap = match str::from_utf8(&rep[start..i]) {
+        Err(_) => return None,
+        Ok(cap) => cap,
+    };
+    Some(CaptureRef {
+        cap: match cap.parse::<u32>() {
+            Ok(i) => Ref::Number(i as usize),
+            Err(_) => Ref::Named(cap),
+        },
+        end: i + 1,
+    })
+}
+
 /// Returns true if and only if the given byte is allowed in a capture name.
 fn is_valid_cap_letter(b: &u8) -> bool {
     match *b {
@@ -182,13 +197,13 @@ mod tests {
         ($name:ident, $text:expr) => {
             #[test]
             fn $name() {
-                assert_eq!(None, find_cap_ref($text));
+                assert_eq!(None, find_cap_ref($text.as_bytes()));
             }
         };
         ($name:ident, $text:expr, $capref:expr) => {
             #[test]
             fn $name() {
-                assert_eq!(Some($capref), find_cap_ref($text));
+                assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
             }
         };
     }
@@ -204,7 +219,8 @@ mod tests {
     find!(find_cap_ref3, "$0", c!(0, 2));
     find!(find_cap_ref4, "$5", c!(5, 2));
     find!(find_cap_ref5, "$10", c!(10, 3));
-    // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers
+    // See https://github.com/rust-lang/regex/pull/585
+    // for more on characters following numbers
     find!(find_cap_ref6, "$42a", c!("42a", 4));
     find!(find_cap_ref7, "${42}a", c!(42, 5));
     find!(find_cap_ref8, "${42");
@@ -217,4 +233,6 @@ mod tests {
     find!(find_cap_ref15, "$1_$2", c!("1_", 3));
     find!(find_cap_ref16, "$x-$y", c!("x", 2));
     find!(find_cap_ref17, "$x_$y", c!("x_", 3));
+    find!(find_cap_ref18, "${#}", c!("#", 4));
+    find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
 }
diff --git a/src/lib.rs b/src/lib.rs
index e0a0975..bdcebd4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -365,7 +365,7 @@ $     the end of text (or end-of-line with multi-line mode)
 
 <pre class="rust">
 (exp)          numbered capture group (indexed by opening parenthesis)
-(?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+(?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
 (?:exp)        non-capturing group
 (?flags)       set flags within current group
 (?flags:exp)   set flags for exp (non-capturing)
@@ -562,7 +562,7 @@ All features below are enabled by default.
   [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
 * **unicode-gencat** -
   Provide the data for
-  [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
+  [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
   This includes, but is not limited to, `Decimal_Number`, `Letter`,
   `Math_Symbol`, `Number` and `Punctuation`.
 * **unicode-perl** -
@@ -731,8 +731,8 @@ Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
 literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that
 matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when
 enabled.
-6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value.
-When the `s` flag is enabled, `.` matches any byte.
+6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the
+`s` flag is additionally enabled, `.` matches any byte.
 
 # Performance
 
diff --git a/src/pikevm.rs b/src/pikevm.rs
index c106c76..299087d 100644
--- a/src/pikevm.rs
+++ b/src/pikevm.rs
@@ -8,7 +8,7 @@
 //
 // It can do more than the DFA can (specifically, record capture locations
 // and execute Unicode word boundary assertions), but at a slower speed.
-// Specifically, the Pike VM exectues a DFA implicitly by repeatedly expanding
+// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding
 // epsilon transitions. That is, the Pike VM engine can be in multiple states
 // at once where as the DFA is only ever in one state at a time.
 //
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index 69f0b33..ca01e0e 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -119,7 +119,8 @@ impl Regex {
         RegexBuilder::new(re).build()
     }
 
-    /// Returns true if and only if the regex matches the string given.
+    /// Returns true if and only if there is a match for the regex in the
+    /// string given.
     ///
     /// It is recommended to use this method if all you need to do is test
     /// a match, since the underlying matching engine may be able to do less
@@ -930,17 +931,22 @@ impl<'t> Captures<'t> {
     /// Expands all instances of `$name` in `replacement` to the corresponding
     /// capture group `name`, and writes them to the `dst` buffer given.
     ///
-    /// `name` may be an integer corresponding to the index of the
-    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// `name` may be an integer corresponding to the index of the capture
+    /// group (counted by order of opening parenthesis where `0` is the
     /// entire match) or it can be a name (consisting of letters, digits or
     /// underscores) corresponding to a named capture group.
     ///
     /// If `name` isn't a valid capture group (whether the name doesn't exist
     /// or isn't a valid index), then it is replaced with the empty string.
     ///
-    /// The longest possible name is used. e.g., `$1a` looks up the capture
-    /// group named `1a` and not the capture group at index `1`. To exert more
-    /// precise control over the name, use braces, e.g., `${1}a`.
+    /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+    /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+    /// capture group at index `1`. To exert more precise control over the
+    /// name, or to refer to a capture group name that uses characters outside
+    /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+    /// using braces, any sequence of valid UTF-8 bytes is permitted. If the
+    /// sequence does not refer to a capture group name in the corresponding
+    /// regex, then it is replaced with an empty string.
     ///
     /// To write a literal `$` use `$$`.
     pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
@@ -1051,6 +1057,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
 ///
 /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
 /// the lifetime `'t` corresponds to the originally matched text.
+#[derive(Clone)]
 pub struct SubCaptureMatches<'c, 't: 'c> {
     caps: &'c Captures<'t>,
     it: SubCapturesPosIter<'c>,
diff --git a/src/re_set.rs b/src/re_set.rs
index fc2b61a..b8954be 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -96,6 +96,19 @@ impl RegexSet {
         RegexSetBuilder::new(exprs).build()
     }
 
+    /// Create a new empty regex set.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// # use regex::RegexSet;
+    /// let set = RegexSet::empty();
+    /// assert!(set.is_empty());
+    /// ```
+    pub fn empty() -> RegexSet {
+        RegexSetBuilder::new(&[""; 0]).build().unwrap()
+    }
+
     /// Returns true if and only if one of the regexes in this set matches
     /// the text given.
     ///
@@ -207,6 +220,11 @@ impl RegexSet {
         self.0.regex_strings().len()
     }
 
+    /// Returns `true` if this set contains no regular expressions.
+    pub fn is_empty(&self) -> bool {
+        self.0.regex_strings().is_empty()
+    }
+
     /// Returns the patterns that this set will match on.
     ///
     /// This function can be used to determine the pattern for a match. The
diff --git a/src/re_trait.rs b/src/re_trait.rs
index b56804e..d14a9f7 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -51,6 +51,7 @@ impl Locations {
 /// Positions are byte indices in terms of the original string matched.
 ///
 /// `'c` is the lifetime of the captures.
+#[derive(Clone)]
 pub struct SubCapturesPosIter<'c> {
     idx: usize,
     locs: &'c Locations,
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index b746599..ea95c1b 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -175,7 +175,8 @@ impl Regex {
         RegexBuilder::new(re).build()
     }
 
-    /// Returns true if and only if the regex matches the string given.
+    /// Returns true if and only if there is a match for the regex in the
+    /// string given.
     ///
     /// It is recommended to use this method if all you need to do is test
     /// a match, since the underlying matching engine may be able to do less
@@ -947,17 +948,22 @@ impl<'t> Captures<'t> {
     /// Expands all instances of `$name` in `replacement` to the corresponding
     /// capture group `name`, and writes them to the `dst` buffer given.
     ///
-    /// `name` may be an integer corresponding to the index of the
-    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// `name` may be an integer corresponding to the index of the capture
+    /// group (counted by order of opening parenthesis where `0` is the
     /// entire match) or it can be a name (consisting of letters, digits or
     /// underscores) corresponding to a named capture group.
     ///
     /// If `name` isn't a valid capture group (whether the name doesn't exist
     /// or isn't a valid index), then it is replaced with the empty string.
     ///
-    /// The longest possible name is used. e.g., `$1a` looks up the capture
-    /// group named `1a` and not the capture group at index `1`. To exert more
-    /// precise control over the name, use braces, e.g., `${1}a`.
+    /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+    /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+    /// capture group at index `1`. To exert more precise control over the
+    /// name, or to refer to a capture group name that uses characters outside
+    /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+    /// using braces, any sequence of characters is permitted. If the sequence
+    /// does not refer to a capture group name in the corresponding regex, then
+    /// it is replaced with an empty string.
     ///
     /// To write a literal `$` use `$$`.
     pub fn expand(&self, replacement: &str, dst: &mut String) {
@@ -1053,6 +1059,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
 ///
 /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
 /// the lifetime `'t` corresponds to the originally matched text.
+#[derive(Clone)]
 pub struct SubCaptureMatches<'c, 't: 'c> {
     caps: &'c Captures<'t>,
     it: SubCapturesPosIter<'c>,
@@ -1122,7 +1129,7 @@ pub trait Replacer {
     /// have a match at capture group `0`.
     ///
     /// For example, a no-op replacement would be
-    /// `dst.extend(caps.get(0).unwrap().as_str())`.
+    /// `dst.push_str(caps.get(0).unwrap().as_str())`.
     fn replace_append(&mut self, caps: &Captures, dst: &mut String);
 
     /// Return a fixed unchanging replacement string.
diff --git a/tests/api.rs b/tests/api.rs
index 0d4962c..c7250a8 100644
--- a/tests/api.rs
+++ b/tests/api.rs
@@ -195,6 +195,18 @@ expand!(
 );
 expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", "");
 
+expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%");
+expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc[");
+expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{");
+expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}");
+expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%");
+expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%");
+expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc[");
+expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "[");
+expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "[");
+expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "[");
+expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "[");
+
 split!(
     split1,
     r"(?-u)\s+",
diff --git a/tests/set.rs b/tests/set.rs
index 648feec..37fcf87 100644
--- a/tests/set.rs
+++ b/tests/set.rs
@@ -54,3 +54,14 @@ fn get_set_patterns() {
     let set = regex_set!(&["a", "b"]);
     assert_eq!(vec!["a", "b"], set.patterns());
 }
+
+#[test]
+fn len_and_empty() {
+    let empty = regex_set!(&[""; 0]);
+    assert_eq!(empty.len(), 0);
+    assert!(empty.is_empty());
+
+    let not_empty = regex_set!(&["ab", "b"]);
+    assert_eq!(not_empty.len(), 2);
+    assert!(!not_empty.is_empty());
+}
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 52522f4..9f1cd0c 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -74,6 +74,9 @@ mat!(
     Some((0, 3))
 );
 mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
+// See: https://github.com/rust-lang/regex/issues/719
+mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
 mat!(
     uni_class_gencat_initial_punctuation,
     r"\p{Initial_Punctuation}",
author	Chih-Hung Hsieh <chh@google.com>	2020-10-26 23:34:49 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2020-10-26 23:34:49 +0000
commit	910574851b504a9aff6861089887f234df2bdf88 (patch)
tree	e873b7eb9bb377a84845126bfbbbc5301481d3bc
parent	ff6b1ad934393f85da3f7edc647555ccbec0205d (diff)
parent	4ba5657398b12fa17ad53e750a42a9b67c2de8af (diff)
download	regex-910574851b504a9aff6861089887f234df2bdf88.tar.gz