diff options
Diffstat (limited to 'src/expand.rs')
-rw-r--r-- | src/expand.rs | 50 |
1 files changed, 34 insertions, 16 deletions
diff --git a/src/expand.rs b/src/expand.rs index 528f55e..fd2ab03 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -24,7 +24,7 @@ pub fn expand_str( continue; } debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement) { + let cap_ref = match find_cap_ref(replacement.as_bytes()) { Some(cap_ref) => cap_ref, None => { dst.push_str("$"); @@ -125,19 +125,15 @@ impl From<usize> for Ref<'static> { /// starting at the beginning of `replacement`. /// /// If no such valid reference could be found, None is returned. -fn find_cap_ref<T: ?Sized + AsRef<[u8]>>( - replacement: &T, -) -> Option<CaptureRef> { +fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> { let mut i = 0; let rep: &[u8] = replacement.as_ref(); if rep.len() <= 1 || rep[0] != b'$' { return None; } - let mut brace = false; i += 1; if rep[i] == b'{' { - brace = true; - i += 1; + return find_cap_ref_braced(rep, i + 1); } let mut cap_end = i; while rep.get(cap_end).map_or(false, is_valid_cap_letter) { @@ -151,12 +147,6 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>( // check with either unsafe or by parsing the number straight from &[u8]. let cap = str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); - if brace { - if !rep.get(cap_end).map_or(false, |&b| b == b'}') { - return None; - } - cap_end += 1; - } Some(CaptureRef { cap: match cap.parse::<u32>() { Ok(i) => Ref::Number(i as usize), @@ -166,6 +156,31 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>( }) } +fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> { + let start = i; + while rep.get(i).map_or(false, |&b| b != b'}') { + i += 1; + } + if !rep.get(i).map_or(false, |&b| b == b'}') { + return None; + } + // When looking at braced names, we don't put any restrictions on the name, + // so it's possible it could be invalid UTF-8. But a capture group name + // can never be invalid UTF-8, so if we have invalid UTF-8, then we can + // safely return None. + let cap = match str::from_utf8(&rep[start..i]) { + Err(_) => return None, + Ok(cap) => cap, + }; + Some(CaptureRef { + cap: match cap.parse::<u32>() { + Ok(i) => Ref::Number(i as usize), + Err(_) => Ref::Named(cap), + }, + end: i + 1, + }) +} + /// Returns true if and only if the given byte is allowed in a capture name. fn is_valid_cap_letter(b: &u8) -> bool { match *b { @@ -182,13 +197,13 @@ mod tests { ($name:ident, $text:expr) => { #[test] fn $name() { - assert_eq!(None, find_cap_ref($text)); + assert_eq!(None, find_cap_ref($text.as_bytes())); } }; ($name:ident, $text:expr, $capref:expr) => { #[test] fn $name() { - assert_eq!(Some($capref), find_cap_ref($text)); + assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); } }; } @@ -204,7 +219,8 @@ mod tests { find!(find_cap_ref3, "$0", c!(0, 2)); find!(find_cap_ref4, "$5", c!(5, 2)); find!(find_cap_ref5, "$10", c!(10, 3)); - // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers + // See https://github.com/rust-lang/regex/pull/585 + // for more on characters following numbers find!(find_cap_ref6, "$42a", c!("42a", 4)); find!(find_cap_ref7, "${42}a", c!(42, 5)); find!(find_cap_ref8, "${42"); @@ -217,4 +233,6 @@ mod tests { find!(find_cap_ref15, "$1_$2", c!("1_", 3)); find!(find_cap_ref16, "$x-$y", c!("x", 2)); find!(find_cap_ref17, "$x_$y", c!("x_", 3)); + find!(find_cap_ref18, "${#}", c!("#", 4)); + find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); } |