aboutsummaryrefslogtreecommitdiff
path: root/src/expand.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/expand.rs')
-rw-r--r--src/expand.rs50
1 files changed, 34 insertions, 16 deletions
diff --git a/src/expand.rs b/src/expand.rs
index 528f55e..fd2ab03 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -24,7 +24,7 @@ pub fn expand_str(
continue;
}
debug_assert!(!replacement.is_empty());
- let cap_ref = match find_cap_ref(replacement) {
+ let cap_ref = match find_cap_ref(replacement.as_bytes()) {
Some(cap_ref) => cap_ref,
None => {
dst.push_str("$");
@@ -125,19 +125,15 @@ impl From<usize> for Ref<'static> {
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
-fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
- replacement: &T,
-) -> Option<CaptureRef> {
+fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
let mut i = 0;
let rep: &[u8] = replacement.as_ref();
if rep.len() <= 1 || rep[0] != b'$' {
return None;
}
- let mut brace = false;
i += 1;
if rep[i] == b'{' {
- brace = true;
- i += 1;
+ return find_cap_ref_braced(rep, i + 1);
}
let mut cap_end = i;
while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
@@ -151,12 +147,6 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
// check with either unsafe or by parsing the number straight from &[u8].
let cap =
str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
- if brace {
- if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
- return None;
- }
- cap_end += 1;
- }
Some(CaptureRef {
cap: match cap.parse::<u32>() {
Ok(i) => Ref::Number(i as usize),
@@ -166,6 +156,31 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
})
}
+fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> {
+ let start = i;
+ while rep.get(i).map_or(false, |&b| b != b'}') {
+ i += 1;
+ }
+ if !rep.get(i).map_or(false, |&b| b == b'}') {
+ return None;
+ }
+ // When looking at braced names, we don't put any restrictions on the name,
+ // so it's possible it could be invalid UTF-8. But a capture group name
+ // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
+ // safely return None.
+ let cap = match str::from_utf8(&rep[start..i]) {
+ Err(_) => return None,
+ Ok(cap) => cap,
+ };
+ Some(CaptureRef {
+ cap: match cap.parse::<u32>() {
+ Ok(i) => Ref::Number(i as usize),
+ Err(_) => Ref::Named(cap),
+ },
+ end: i + 1,
+ })
+}
+
/// Returns true if and only if the given byte is allowed in a capture name.
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
@@ -182,13 +197,13 @@ mod tests {
($name:ident, $text:expr) => {
#[test]
fn $name() {
- assert_eq!(None, find_cap_ref($text));
+ assert_eq!(None, find_cap_ref($text.as_bytes()));
}
};
($name:ident, $text:expr, $capref:expr) => {
#[test]
fn $name() {
- assert_eq!(Some($capref), find_cap_ref($text));
+ assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
}
};
}
@@ -204,7 +219,8 @@ mod tests {
find!(find_cap_ref3, "$0", c!(0, 2));
find!(find_cap_ref4, "$5", c!(5, 2));
find!(find_cap_ref5, "$10", c!(10, 3));
- // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers
+ // See https://github.com/rust-lang/regex/pull/585
+ // for more on characters following numbers
find!(find_cap_ref6, "$42a", c!("42a", 4));
find!(find_cap_ref7, "${42}a", c!(42, 5));
find!(find_cap_ref8, "${42");
@@ -217,4 +233,6 @@ mod tests {
find!(find_cap_ref15, "$1_$2", c!("1_", 3));
find!(find_cap_ref16, "$x-$y", c!("x", 2));
find!(find_cap_ref17, "$x_$y", c!("x_", 3));
+ find!(find_cap_ref18, "${#}", c!("#", 4));
+ find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
}