aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Hung Hsieh <chh@google.com>2020-10-27 00:13:35 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2020-10-27 00:13:35 +0000
commit7b9b5e476d12a68300b7ce2bc27d1f0d2f2fdcf0 (patch)
treee873b7eb9bb377a84845126bfbbbc5301481d3bc
parent033eb5ccb02ce1b47cdc746cf4ceaef3c01335a2 (diff)
parent910574851b504a9aff6861089887f234df2bdf88 (diff)
downloadregex-7b9b5e476d12a68300b7ce2bc27d1f0d2f2fdcf0.tar.gz
Upgrade rust/crates/regex to 1.4.1 am: 849e4457e7 am: 7767e43fa4 am: 4ba5657398 am: 910574851b
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/regex/+/1474897 Change-Id: If9e3296d497e667f21ed8942c9ec14232adc4717
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--Android.bp4
-rw-r--r--CHANGELOG.md47
-rw-r--r--Cargo.toml4
-rw-r--r--Cargo.toml.orig4
-rw-r--r--METADATA8
-rw-r--r--src/compile.rs2
-rw-r--r--src/dfa.rs12
-rw-r--r--src/expand.rs50
-rw-r--r--src/lib.rs8
-rw-r--r--src/pikevm.rs2
-rw-r--r--src/re_bytes.rs19
-rw-r--r--src/re_set.rs18
-rw-r--r--src/re_trait.rs1
-rw-r--r--src/re_unicode.rs21
-rw-r--r--tests/api.rs12
-rw-r--r--tests/set.rs11
-rw-r--r--tests/unicode.rs3
18 files changed, 173 insertions, 55 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index bd8a005..407f0d7 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "691606773f525be32a59a0c28eae203a79663706"
+ "sha1": "d5bf98f293b48174d5378471d01c2e0ef271bbbc"
}
}
diff --git a/Android.bp b/Android.bp
index d122541..f2e1727 100644
--- a/Android.bp
+++ b/Android.bp
@@ -34,8 +34,8 @@ rust_library_host {
}
// dependent_library ["feature_list"]
-// aho-corasick-0.7.13 "default,std"
+// aho-corasick-0.7.14 "default,std"
// lazy_static-1.4.0
// memchr-2.3.3 "default,std,use_std"
-// regex-syntax-0.6.18 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
+// regex-syntax-0.6.20 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
// thread_local-1.0.1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7e528d..82aa089 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,44 @@
+1.4.1 (2020-10-13)
+==================
+This is a small bug fix release that makes `\p{cf}` work. Previously, it would
+report "property not found" even though `cf` is a valid abbreviation for the
+`Format` general category.
+
+* [BUG #719](https://github.com/rust-lang/regex/issues/719):
+ Fixes bug that prevented `\p{cf}` from working.
+
+
+1.4.0 (2020-10-11)
+==================
+This releases has a few minor documentation fixes as well as some very minor
+API additions. The MSRV remains at Rust 1.28 for now, but this is intended to
+increase to at least Rust 1.41.1 soon.
+
+This release also adds support for OSS-Fuzz. Kudos to
+[@DavidKorczynski](https://github.com/DavidKorczynski)
+for doing the heavy lifting for that!
+
+New features:
+
+* [FEATURE #649](https://github.com/rust-lang/regex/issues/649):
+ Support `[`, `]` and `.` in capture group names.
+* [FEATURE #687](https://github.com/rust-lang/regex/issues/687):
+ Add `is_empty` predicate to `RegexSet`.
+* [FEATURE #689](https://github.com/rust-lang/regex/issues/689):
+ Implement `Clone` for `SubCaptureMatches`.
+* [FEATURE #715](https://github.com/rust-lang/regex/issues/715):
+ Add `empty` constructor to `RegexSet` for convenience.
+
+Bug fixes:
+
+* [BUG #694](https://github.com/rust-lang/regex/issues/694):
+ Fix doc example for `Replacer::replace_append`.
+* [BUG #698](https://github.com/rust-lang/regex/issues/698):
+ Clarify docs for `s` flag when using a `bytes::Regex`.
+* [BUG #711](https://github.com/rust-lang/regex/issues/711):
+ Clarify `is_match` docs to indicate that it can match anywhere in string.
+
+
1.3.9 (2020-05-28)
==================
This release fixes a MSRV (Minimum Support Rust Version) regression in the
@@ -6,7 +47,7 @@ compile on other Rust versions, such as Rust 1.39.
Bug fixes:
-* [BUG #685](https://github.com/rust-lang/regex/issue/685):
+* [BUG #685](https://github.com/rust-lang/regex/issues/685):
Remove use of `doc_comment` crate, which cannot be used before Rust 1.43.
@@ -22,9 +63,9 @@ Bug fixes:
* [BUG #523](https://github.com/rust-lang/regex/pull/523):
Add note to documentation that spaces can be escaped in `x` mode.
-* [BUG #524](https://github.com/rust-lang/regex/issue/524):
+* [BUG #524](https://github.com/rust-lang/regex/issues/524):
Add support for empty sub-expressions, including empty alternations.
-* [BUG #659](https://github.com/rust-lang/regex/issue/659):
+* [BUG #659](https://github.com/rust-lang/regex/issues/659):
Fix match bug caused by an empty sub-expression miscompilation.
diff --git a/Cargo.toml b/Cargo.toml
index 02caabb..bc455bc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@
[package]
name = "regex"
-version = "1.3.9"
+version = "1.4.1"
authors = ["The Rust Project Developers"]
exclude = ["/scripts/*", "/.github/*"]
autotests = false
@@ -80,7 +80,7 @@ version = "2.2.1"
optional = true
[dependencies.regex-syntax]
-version = "0.6.18"
+version = "0.6.20"
default-features = false
[dependencies.thread_local]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 1296ae0..fd238a9 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "regex"
-version = "1.3.9" #:version
+version = "1.4.1" #:version
authors = ["The Rust Project Developers"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@@ -118,7 +118,7 @@ optional = true
# For parsing regular expressions.
[dependencies.regex-syntax]
path = "regex-syntax"
-version = "0.6.18"
+version = "0.6.20"
default-features = false
[dev-dependencies]
diff --git a/METADATA b/METADATA
index 644cabf..86023f9 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/regex/regex-1.3.9.crate"
+ value: "https://static.crates.io/crates/regex/regex-1.4.1.crate"
}
- version: "1.3.9"
+ version: "1.4.1"
license_type: NOTICE
last_upgrade_date {
year: 2020
- month: 5
- day: 28
+ month: 10
+ day: 26
}
}
diff --git a/src/compile.rs b/src/compile.rs
index ad54040..cdc583c 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -222,7 +222,7 @@ impl Compiler {
/// hole
/// ```
///
- /// To compile two expressions, e1 and e2, concatinated together we
+ /// To compile two expressions, e1 and e2, concatenated together we
/// would do:
///
/// ```ignore
diff --git a/src/dfa.rs b/src/dfa.rs
index decc3b9..2a365ee 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -679,7 +679,7 @@ impl<'a> Fsm<'a> {
}
} else if next_si & STATE_START > 0 {
// A start state isn't in the common case because we may
- // what to do quick prefix scanning. If the program doesn't
+ // want to do quick prefix scanning. If the program doesn't
// have a detected prefix, then start states are actually
// considered common and this case is never reached.
debug_assert!(self.has_prefix());
@@ -725,7 +725,7 @@ impl<'a> Fsm<'a> {
}
}
- // Run the DFA once more on the special EOF senitnel value.
+ // Run the DFA once more on the special EOF sentinel value.
// We don't care about the special bits in the state pointer any more,
// so get rid of them.
prev_si &= STATE_MAX;
@@ -830,7 +830,7 @@ impl<'a> Fsm<'a> {
}
}
- // Run the DFA once more on the special EOF senitnel value.
+ // Run the DFA once more on the special EOF sentinel value.
prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
None => return Result::Quit,
Some(STATE_DEAD) => return result.set_non_match(0),
@@ -913,8 +913,8 @@ impl<'a> Fsm<'a> {
if self.state(si).flags().has_empty() {
// Compute the flags immediately preceding the current byte.
// This means we only care about the "end" or "end line" flags.
- // (The "start" flags are computed immediately proceding the
- // current byte and is handled below.)
+ // (The "start" flags are computed immediately following the
+ // current byte and are handled below.)
let mut flags = EmptyFlags::default();
if b.is_eof() {
flags.end = true;
@@ -1048,7 +1048,7 @@ impl<'a> Fsm<'a> {
///
/// If matching starts after the beginning of the input, then only start
/// line should be set if the preceding byte is `\n`. End line should never
- /// be set in this case. (Even if the proceding byte is a `\n`, it will
+ /// be set in this case. (Even if the following byte is a `\n`, it will
/// be handled in a subsequent DFA state.)
fn follow_epsilons(
&mut self,
diff --git a/src/expand.rs b/src/expand.rs
index 528f55e..fd2ab03 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -24,7 +24,7 @@ pub fn expand_str(
continue;
}
debug_assert!(!replacement.is_empty());
- let cap_ref = match find_cap_ref(replacement) {
+ let cap_ref = match find_cap_ref(replacement.as_bytes()) {
Some(cap_ref) => cap_ref,
None => {
dst.push_str("$");
@@ -125,19 +125,15 @@ impl From<usize> for Ref<'static> {
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
-fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
- replacement: &T,
-) -> Option<CaptureRef> {
+fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
let mut i = 0;
let rep: &[u8] = replacement.as_ref();
if rep.len() <= 1 || rep[0] != b'$' {
return None;
}
- let mut brace = false;
i += 1;
if rep[i] == b'{' {
- brace = true;
- i += 1;
+ return find_cap_ref_braced(rep, i + 1);
}
let mut cap_end = i;
while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
@@ -151,12 +147,6 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
// check with either unsafe or by parsing the number straight from &[u8].
let cap =
str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
- if brace {
- if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
- return None;
- }
- cap_end += 1;
- }
Some(CaptureRef {
cap: match cap.parse::<u32>() {
Ok(i) => Ref::Number(i as usize),
@@ -166,6 +156,31 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
})
}
+fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> {
+ let start = i;
+ while rep.get(i).map_or(false, |&b| b != b'}') {
+ i += 1;
+ }
+ if !rep.get(i).map_or(false, |&b| b == b'}') {
+ return None;
+ }
+ // When looking at braced names, we don't put any restrictions on the name,
+ // so it's possible it could be invalid UTF-8. But a capture group name
+ // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
+ // safely return None.
+ let cap = match str::from_utf8(&rep[start..i]) {
+ Err(_) => return None,
+ Ok(cap) => cap,
+ };
+ Some(CaptureRef {
+ cap: match cap.parse::<u32>() {
+ Ok(i) => Ref::Number(i as usize),
+ Err(_) => Ref::Named(cap),
+ },
+ end: i + 1,
+ })
+}
+
/// Returns true if and only if the given byte is allowed in a capture name.
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
@@ -182,13 +197,13 @@ mod tests {
($name:ident, $text:expr) => {
#[test]
fn $name() {
- assert_eq!(None, find_cap_ref($text));
+ assert_eq!(None, find_cap_ref($text.as_bytes()));
}
};
($name:ident, $text:expr, $capref:expr) => {
#[test]
fn $name() {
- assert_eq!(Some($capref), find_cap_ref($text));
+ assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
}
};
}
@@ -204,7 +219,8 @@ mod tests {
find!(find_cap_ref3, "$0", c!(0, 2));
find!(find_cap_ref4, "$5", c!(5, 2));
find!(find_cap_ref5, "$10", c!(10, 3));
- // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers
+ // See https://github.com/rust-lang/regex/pull/585
+ // for more on characters following numbers
find!(find_cap_ref6, "$42a", c!("42a", 4));
find!(find_cap_ref7, "${42}a", c!(42, 5));
find!(find_cap_ref8, "${42");
@@ -217,4 +233,6 @@ mod tests {
find!(find_cap_ref15, "$1_$2", c!("1_", 3));
find!(find_cap_ref16, "$x-$y", c!("x", 2));
find!(find_cap_ref17, "$x_$y", c!("x_", 3));
+ find!(find_cap_ref18, "${#}", c!("#", 4));
+ find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
}
diff --git a/src/lib.rs b/src/lib.rs
index e0a0975..bdcebd4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -365,7 +365,7 @@ $ the end of text (or end-of-line with multi-line mode)
<pre class="rust">
(exp) numbered capture group (indexed by opening parenthesis)
-(?P&lt;name&gt;exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+(?P&lt;name&gt;exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
(?:exp) non-capturing group
(?flags) set flags within current group
(?flags:exp) set flags for exp (non-capturing)
@@ -562,7 +562,7 @@ All features below are enabled by default.
[Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
* **unicode-gencat** -
Provide the data for
- [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
+ [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
This includes, but is not limited to, `Decimal_Number`, `Letter`,
`Math_Symbol`, `Number` and `Punctuation`.
* **unicode-perl** -
@@ -731,8 +731,8 @@ Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that
matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when
enabled.
-6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value.
-When the `s` flag is enabled, `.` matches any byte.
+6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the
+`s` flag is additionally enabled, `.` matches any byte.
# Performance
diff --git a/src/pikevm.rs b/src/pikevm.rs
index c106c76..299087d 100644
--- a/src/pikevm.rs
+++ b/src/pikevm.rs
@@ -8,7 +8,7 @@
//
// It can do more than the DFA can (specifically, record capture locations
// and execute Unicode word boundary assertions), but at a slower speed.
-// Specifically, the Pike VM exectues a DFA implicitly by repeatedly expanding
+// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding
// epsilon transitions. That is, the Pike VM engine can be in multiple states
// at once where as the DFA is only ever in one state at a time.
//
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index 69f0b33..ca01e0e 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -119,7 +119,8 @@ impl Regex {
RegexBuilder::new(re).build()
}
- /// Returns true if and only if the regex matches the string given.
+ /// Returns true if and only if there is a match for the regex in the
+ /// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
@@ -930,17 +931,22 @@ impl<'t> Captures<'t> {
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
+ /// `name` may be an integer corresponding to the index of the capture
+ /// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
- /// The longest possible name is used. e.g., `$1a` looks up the capture
- /// group named `1a` and not the capture group at index `1`. To exert more
- /// precise control over the name, use braces, e.g., `${1}a`.
+ /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+ /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+ /// capture group at index `1`. To exert more precise control over the
+ /// name, or to refer to a capture group name that uses characters outside
+ /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+ /// using braces, any sequence of valid UTF-8 bytes is permitted. If the
+ /// sequence does not refer to a capture group name in the corresponding
+ /// regex, then it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
@@ -1051,6 +1057,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
+#[derive(Clone)]
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
diff --git a/src/re_set.rs b/src/re_set.rs
index fc2b61a..b8954be 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -96,6 +96,19 @@ impl RegexSet {
RegexSetBuilder::new(exprs).build()
}
+ /// Create a new empty regex set.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use regex::RegexSet;
+ /// let set = RegexSet::empty();
+ /// assert!(set.is_empty());
+ /// ```
+ pub fn empty() -> RegexSet {
+ RegexSetBuilder::new(&[""; 0]).build().unwrap()
+ }
+
/// Returns true if and only if one of the regexes in this set matches
/// the text given.
///
@@ -207,6 +220,11 @@ impl RegexSet {
self.0.regex_strings().len()
}
+ /// Returns `true` if this set contains no regular expressions.
+ pub fn is_empty(&self) -> bool {
+ self.0.regex_strings().is_empty()
+ }
+
/// Returns the patterns that this set will match on.
///
/// This function can be used to determine the pattern for a match. The
diff --git a/src/re_trait.rs b/src/re_trait.rs
index b56804e..d14a9f7 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -51,6 +51,7 @@ impl Locations {
/// Positions are byte indices in terms of the original string matched.
///
/// `'c` is the lifetime of the captures.
+#[derive(Clone)]
pub struct SubCapturesPosIter<'c> {
idx: usize,
locs: &'c Locations,
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index b746599..ea95c1b 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -175,7 +175,8 @@ impl Regex {
RegexBuilder::new(re).build()
}
- /// Returns true if and only if the regex matches the string given.
+ /// Returns true if and only if there is a match for the regex in the
+ /// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
@@ -947,17 +948,22 @@ impl<'t> Captures<'t> {
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
+ /// `name` may be an integer corresponding to the index of the capture
+ /// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
- /// The longest possible name is used. e.g., `$1a` looks up the capture
- /// group named `1a` and not the capture group at index `1`. To exert more
- /// precise control over the name, use braces, e.g., `${1}a`.
+ /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+ /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+ /// capture group at index `1`. To exert more precise control over the
+ /// name, or to refer to a capture group name that uses characters outside
+ /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+ /// using braces, any sequence of characters is permitted. If the sequence
+ /// does not refer to a capture group name in the corresponding regex, then
+ /// it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &str, dst: &mut String) {
@@ -1053,6 +1059,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
+#[derive(Clone)]
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
@@ -1122,7 +1129,7 @@ pub trait Replacer {
/// have a match at capture group `0`.
///
/// For example, a no-op replacement would be
- /// `dst.extend(caps.get(0).unwrap().as_str())`.
+ /// `dst.push_str(caps.get(0).unwrap().as_str())`.
fn replace_append(&mut self, caps: &Captures, dst: &mut String);
/// Return a fixed unchanging replacement string.
diff --git a/tests/api.rs b/tests/api.rs
index 0d4962c..c7250a8 100644
--- a/tests/api.rs
+++ b/tests/api.rs
@@ -195,6 +195,18 @@ expand!(
);
expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", "");
+expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%");
+expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc[");
+expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{");
+expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}");
+expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%");
+expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%");
+expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc[");
+expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "[");
+expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "[");
+expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "[");
+expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "[");
+
split!(
split1,
r"(?-u)\s+",
diff --git a/tests/set.rs b/tests/set.rs
index 648feec..37fcf87 100644
--- a/tests/set.rs
+++ b/tests/set.rs
@@ -54,3 +54,14 @@ fn get_set_patterns() {
let set = regex_set!(&["a", "b"]);
assert_eq!(vec!["a", "b"], set.patterns());
}
+
+#[test]
+fn len_and_empty() {
+ let empty = regex_set!(&[""; 0]);
+ assert_eq!(empty.len(), 0);
+ assert!(empty.is_empty());
+
+ let not_empty = regex_set!(&["ab", "b"]);
+ assert_eq!(not_empty.len(), 2);
+ assert!(!not_empty.is_empty());
+}
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 52522f4..9f1cd0c 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -74,6 +74,9 @@ mat!(
Some((0, 3))
);
mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
+// See: https://github.com/rust-lang/regex/issues/719
+mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
mat!(
uni_class_gencat_initial_punctuation,
r"\p{Initial_Punctuation}",