diff options
Diffstat (limited to 'src/protocol/common/hex.rs')
-rw-r--r-- | src/protocol/common/hex.rs | 190 |
1 files changed, 114 insertions, 76 deletions
diff --git a/src/protocol/common/hex.rs b/src/protocol/common/hex.rs index 4c3170b..ade9032 100644 --- a/src/protocol/common/hex.rs +++ b/src/protocol/common/hex.rs @@ -8,7 +8,7 @@ pub enum DecodeHexError { InvalidOutput, } -/// Decode a GDB dex string into the specified integer. +/// Decode a GDB hex string into the specified integer. /// /// GDB hex strings may include "xx", which represent "missing" data. This /// method simply treats "xx" as 0x00. @@ -35,7 +35,7 @@ where Ok(result) } -/// Wrapper around a raw hex string. Enabled "late" calls to `decode` from +/// Wrapper around a raw hex string. Enables "late" calls to `decode` from /// outside the `crate::protocol` module. #[derive(Debug, Clone, Copy)] pub struct HexString<'a>(pub &'a [u8]); @@ -54,6 +54,7 @@ pub enum DecodeHexBufError { NotAscii, } +#[inline] fn ascii2byte(c: u8) -> Option<u8> { match c { b'0'..=b'9' => Some(c - b'0'), @@ -64,9 +65,10 @@ fn ascii2byte(c: u8) -> Option<u8> { } } -/// Check if the byte `c` is a valid GDB hex digit `[0-9][a-f][A-F][xX]` -#[allow(clippy::match_like_matches_macro)] +/// Check if the byte `c` is a valid GDB hex digit `[0-9a-fA-FxX]` +#[inline] pub fn is_hex(c: u8) -> bool { + #[allow(clippy::match_like_matches_macro)] // mirror ascii2byte match c { b'0'..=b'9' => true, b'a'..=b'f' => true, @@ -81,66 +83,115 @@ pub fn is_hex(c: u8) -> bool { /// GDB hex strings may include "xx", which represent "missing" data. This /// method simply treats "xx" as 0x00. // TODO: maybe don't blindly translate "xx" as 0x00? -// TODO: rewrite this method to elide bound checks +#[cfg(not(feature = "paranoid_unsafe"))] pub fn decode_hex_buf(base_buf: &mut [u8]) -> Result<&mut [u8], DecodeHexBufError> { use DecodeHexBufError::*; + if base_buf.is_empty() { + return Ok(&mut []); + } + let odd_adust = base_buf.len() % 2; if odd_adust != 0 { base_buf[0] = ascii2byte(base_buf[0]).ok_or(NotAscii)?; } + let buf = &mut base_buf[odd_adust..]; let decoded_len = buf.len() / 2; for i in 0..decoded_len { - let b = ascii2byte(buf[i * 2]).ok_or(NotAscii)? << 4 - | ascii2byte(buf[i * 2 + 1]).ok_or(NotAscii)?; - buf[i] = b as u8; - } + // SAFETY: rustc isn't smart enough to automatically elide these bound checks. + // + // If buf.len() == 0 or 1: trivially safe, since the for block is never taken + // If buf.len() >= 2: the range of values for `i` is 0..(buf.len() / 2 - 1) + let (hi, lo, b) = unsafe { + ( + // (buf.len() / 2 - 1) * 2 + // == (buf.len() - 2) + // since buf.len() is >2, this is in-bounds + *buf.get_unchecked(i * 2), + // (buf.len() / 2 - 1) * 2 + 1 + // == (buf.len() - 1) + // since buf.len() is >2, this is in-bounds + *buf.get_unchecked(i * 2 + 1), + // since buf.len() is >2, (buf.len() / 2 - 1) is always in-bounds + buf.get_unchecked_mut(i), + ) + }; - Ok(&mut base_buf[..decoded_len + odd_adust]) -} + let hi = ascii2byte(hi).ok_or(NotAscii)?; + let lo = ascii2byte(lo).ok_or(NotAscii)?; + *b = hi << 4 | lo; + } -#[allow(dead_code)] -#[derive(Debug)] -pub enum EncodeHexBufError { - SmallBuffer, + // SAFETY: rustc isn't smart enough to automatically elide this bound check. + // + // Consider the different values (decoded_len + odd_adust) can take: + // + // buf.len() | (decoded_len + odd_adust) + // -----------|--------------------------- + // 0 | (0 + 0) == 0 + // 1 | (0 + 1) == 1 + // 2 | (1 + 0) == 1 + // 3 | (1 + 1) == 2 + // 4 | (2 + 0) == 2 + // 5 | (2 + 1) == 3 + // + // Note that the computed index is always in-bounds. + // + // If I were still in undergrad, I could probably have whipped up a proper + // mathematical proof by induction or whatnot, but hopefully this "proof by + // example" ought to suffice. + unsafe { Ok(base_buf.get_unchecked_mut(..decoded_len + odd_adust)) } } -/// Encode a GDB hex string into a byte slice _in place_. +/// Decode a GDB hex string into a byte slice _in place_. /// -/// The data to be encoded should be copied into the buffer from -/// `buf[start_idx..]`. The buffer itself must be at least `data.len() * 2` -/// bytes in size, as each byte is expanded into a two byte hex string. -#[allow(dead_code)] -pub fn encode_hex_buf(buf: &mut [u8], start_idx: usize) -> Result<&mut [u8], EncodeHexBufError> { - use EncodeHexBufError::*; +/// GDB hex strings may include "xx", which represent "missing" data. This +/// method simply treats "xx" as 0x00. +// TODO: maybe don't blindly translate "xx" as 0x00? +#[cfg(feature = "paranoid_unsafe")] +pub fn decode_hex_buf(base_buf: &mut [u8]) -> Result<&mut [u8], DecodeHexBufError> { + use DecodeHexBufError::*; - let len = buf.len() - start_idx; - let encoded_len = len * 2; + let odd_adust = base_buf.len() % 2; + if odd_adust != 0 { + base_buf[0] = ascii2byte(base_buf[0]).ok_or(NotAscii)?; + } + let buf = &mut base_buf[odd_adust..]; - if buf.len() < encoded_len { - return Err(SmallBuffer); + let decoded_len = buf.len() / 2; + for i in 0..decoded_len { + let b = ascii2byte(buf[i * 2]).ok_or(NotAscii)? << 4 + | ascii2byte(buf[i * 2 + 1]).ok_or(NotAscii)?; + buf[i] = b; } - for i in 0..encoded_len { - let byte = buf[start_idx + i / 2]; - let nybble = if i % 2 == 0 { - // high - (byte & 0xf0) >> 4 - } else { - // low - byte & 0x0f - }; + Ok(&mut base_buf[..decoded_len + odd_adust]) +} - buf[i] = match nybble { - 0x0..=0x9 => b'0' + nybble, - 0xa..=0xf => b'A' + (nybble - 0xa), - _ => unreachable!(), // could be unreachable_unchecked... - }; +/// Decode GDB escaped binary bytes into origin bytes _in place_. +// +// Thanks reddit! +// https://www.reddit.com/r/rust/comments/110qzq9/any_idea_why_rust_isnt_able_to_elide_this_bounds/ +pub fn decode_bin_buf(buf: &mut [u8]) -> Option<&mut [u8]> { + let mut i = 0; + let len = buf.len(); + for j in 0..len { + if i >= len { + return Some(&mut buf[..j]); + } + + if buf[i] == b'}' { + buf[j] = buf.get(i + 1)? ^ 0x20; + i += 1; + } else { + buf[j] = buf[i]; + } + i += 1; } - Ok(&mut buf[..encoded_len]) + Some(buf) } #[cfg(test)] @@ -148,40 +199,6 @@ mod tests { use super::*; #[test] - fn encode_hex_simple() { - let payload = [0xde, 0xad, 0xbe, 0xef]; - let mut buf = [0; 16]; - - let start_idx = buf.len() - payload.len(); - - // copy the payload into the buffer - buf[start_idx..].copy_from_slice(&payload); - let out = encode_hex_buf(&mut buf, start_idx).unwrap(); - - assert_eq!(out, b"DEADBEEF"); - } - - #[test] - fn encode_hex_in_chunks() { - let payload = (0..=255).collect::<Vec<u8>>(); - let mut out = Vec::new(); - - let mut buf = [0; 30]; - - for c in payload.chunks(15) { - let start_idx = buf.len() - c.len(); - - let data_buf = &mut buf[start_idx..]; - data_buf[..c.len()].copy_from_slice(c); - out.extend_from_slice(encode_hex_buf(&mut buf, start_idx).unwrap()); - } - - let expect = (0..=255).map(|b| format!("{:02X?}", b)).collect::<String>(); - - assert_eq!(out, expect.as_bytes()) - } - - #[test] fn decode_hex_buf_odd() { let mut payload = b"ffffff4".to_vec(); let res = decode_hex_buf(&mut payload).unwrap(); @@ -189,7 +206,14 @@ mod tests { } #[test] - fn decode_hex_buf_2() { + fn decode_hex_buf_even() { + let mut payload = b"0123456789abcdef".to_vec(); + let res = decode_hex_buf(&mut payload).unwrap(); + assert_eq!(res, [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef]); + } + + #[test] + fn decode_hex_buf_odd_alt() { let mut payload = b"12345".to_vec(); let res = decode_hex_buf(&mut payload).unwrap(); assert_eq!(res, [0x1, 0x23, 0x45]); @@ -201,4 +225,18 @@ mod tests { let res = decode_hex_buf(&mut payload).unwrap(); assert_eq!(res, [0x1]); } + + #[test] + fn decode_hex_buf_empty() { + let mut payload = b"".to_vec(); + let res = decode_hex_buf(&mut payload).unwrap(); + assert_eq!(res, []); + } + + #[test] + fn decode_bin_buf_escaped() { + let mut payload = b"}\x03}\x04}]}\n".to_vec(); + let res = decode_bin_buf(&mut payload).unwrap(); + assert_eq!(res, [0x23, 0x24, 0x7d, 0x2a]); + } } |