aboutsummaryrefslogtreecommitdiff
path: root/src/debug.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/debug.rs')
-rw-r--r--src/debug.rs80
1 files changed, 80 insertions, 0 deletions
diff --git a/src/debug.rs b/src/debug.rs
new file mode 100644
index 0000000..9882ea2
--- /dev/null
+++ b/src/debug.rs
@@ -0,0 +1,80 @@
+/// A type that provides a human readable debug impl for arbitrary bytes.
+///
+/// This generally works best when the bytes are presumed to be mostly UTF-8,
+/// but will work for anything.
+///
+/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
+pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
+
+impl<'a> core::fmt::Debug for Bytes<'a> {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ write!(f, "\"")?;
+ // This is a sad re-implementation of a similar impl found in bstr.
+ let mut bytes = self.0;
+ while let Some(result) = utf8_decode(bytes) {
+ let ch = match result {
+ Ok(ch) => ch,
+ Err(byte) => {
+ write!(f, r"\x{:02x}", byte)?;
+ bytes = &bytes[1..];
+ continue;
+ }
+ };
+ bytes = &bytes[ch.len_utf8()..];
+ match ch {
+ '\0' => write!(f, "\\0")?,
+ // ASCII control characters except \0, \n, \r, \t
+ '\x01'..='\x08'
+ | '\x0b'
+ | '\x0c'
+ | '\x0e'..='\x19'
+ | '\x7f' => {
+ write!(f, "\\x{:02x}", u32::from(ch))?;
+ }
+ '\n' | '\r' | '\t' | _ => {
+ write!(f, "{}", ch.escape_debug())?;
+ }
+ }
+ }
+ write!(f, "\"")?;
+ Ok(())
+ }
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the given
+/// byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
+ fn len(byte: u8) -> Option<usize> {
+ if byte <= 0x7F {
+ return Some(1);
+ } else if byte & 0b1100_0000 == 0b1000_0000 {
+ return None;
+ } else if byte <= 0b1101_1111 {
+ Some(2)
+ } else if byte <= 0b1110_1111 {
+ Some(3)
+ } else if byte <= 0b1111_0111 {
+ Some(4)
+ } else {
+ None
+ }
+ }
+
+ if bytes.is_empty() {
+ return None;
+ }
+ let len = match len(bytes[0]) {
+ None => return Some(Err(bytes[0])),
+ Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
+ Some(1) => return Some(Ok(char::from(bytes[0]))),
+ Some(len) => len,
+ };
+ match core::str::from_utf8(&bytes[..len]) {
+ Ok(s) => Some(Ok(s.chars().next().unwrap())),
+ Err(_) => Some(Err(bytes[0])),
+ }
+}