diff options
Diffstat (limited to 'src/text_format.rs')
-rw-r--r-- | src/text_format.rs | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/src/text_format.rs b/src/text_format.rs new file mode 100644 index 0000000..7ba3721 --- /dev/null +++ b/src/text_format.rs @@ -0,0 +1,328 @@ +//! Protobuf "text format" implementation. +//! +//! Text format message look like this: +//! +//! ```text,ignore +//! size: 17 +//! color: "red" +//! children { +//! size: 18 +//! color: "blue" +//! } +//! children { +//! size: 19 +//! color: "green" +//! } +//! ``` +//! +//! This format is not specified, but it is implemented by all official +//! protobuf implementations, including `protoc` command which can decode +//! and encode messages using text format. + +use core::Message; +use reflect::ReflectFieldRef; +use reflect::ReflectValueRef; +use std; +use std::fmt; +use std::fmt::Write; + +fn quote_bytes_to(bytes: &[u8], buf: &mut String) { + for &c in bytes { + match c { + b'\n' => buf.push_str(r"\n"), + b'\r' => buf.push_str(r"\r"), + b'\t' => buf.push_str(r"\t"), + b'"' => buf.push_str("\\\""), + b'\\' => buf.push_str(r"\\"), + b'\x20'..=b'\x7e' => buf.push(c as char), + _ => { + buf.push('\\'); + buf.push((b'0' + (c >> 6)) as char); + buf.push((b'0' + ((c >> 3) & 7)) as char); + buf.push((b'0' + (c & 7)) as char); + } + } + } +} + +fn quote_escape_bytes_to(bytes: &[u8], buf: &mut String) { + buf.push('"'); + quote_bytes_to(bytes, buf); + buf.push('"'); +} + +#[doc(hidden)] +pub fn quote_escape_bytes(bytes: &[u8]) -> String { + let mut r = String::new(); + quote_escape_bytes_to(bytes, &mut r); + r +} + +#[doc(hidden)] +pub fn unescape_string(string: &str) -> Vec<u8> { + fn parse_if_digit(chars: &mut std::str::Chars) -> u8 { + let mut copy = chars.clone(); + let f = match copy.next() { + None => return 0, + Some(f) => f, + }; + let d = match f { + '0'..='9' => (f as u8 - b'0'), + _ => return 0, + }; + *chars = copy; + d + } + + fn parse_hex_digit(chars: &mut std::str::Chars) -> u8 { + match chars.next().unwrap() { + c @ '0'..='9' => (c as u8) - b'0', + c @ 'a'..='f' => (c as u8) - b'a' + 10, + c @ 'A'..='F' => (c as u8) - b'A' + 10, + _ => panic!("incorrect hex escape"), + } + } + + fn parse_escape_rem(chars: &mut std::str::Chars) -> u8 { + let n = chars.next().unwrap(); + match n { + 'a' => return b'\x07', + 'b' => return b'\x08', + 'f' => return b'\x0c', + 'n' => return b'\n', + 'r' => return b'\r', + 't' => return b'\t', + 'v' => return b'\x0b', + '"' => return b'"', + '\'' => return b'\'', + '0'..='9' => { + let d1 = n as u8 - b'0'; + let d2 = parse_if_digit(chars); + let d3 = parse_if_digit(chars); + return (d1 * 64 + d2 * 8 + d3) as u8; + } + 'x' => { + let d1 = parse_hex_digit(chars); + let d2 = parse_hex_digit(chars); + return d1 * 16 + d2; + } + c => return c as u8, // TODO: validate ASCII + }; + } + + let mut chars = string.chars(); + let mut r = Vec::new(); + + loop { + let f = match chars.next() { + None => return r, + Some(f) => f, + }; + + if f == '\\' { + r.push(parse_escape_rem(&mut chars)); + } else { + r.push(f as u8); // TODO: escape UTF-8 + } + } +} + +fn print_str_to(s: &str, buf: &mut String) { + // TODO: keep printable Unicode + quote_escape_bytes_to(s.as_bytes(), buf); +} + +fn do_indent(buf: &mut String, pretty: bool, indent: usize) { + if pretty && indent > 0 { + for _ in 0..indent { + buf.push_str(" "); + } + } +} + +fn print_start_field( + buf: &mut String, + pretty: bool, + indent: usize, + first: &mut bool, + field_name: &str, +) { + if !*first && !pretty { + buf.push_str(" "); + } + do_indent(buf, pretty, indent); + *first = false; + buf.push_str(field_name); +} + +fn print_end_field(buf: &mut String, pretty: bool) { + if pretty { + buf.push_str("\n"); + } +} + +fn print_field( + buf: &mut String, + pretty: bool, + indent: usize, + first: &mut bool, + field_name: &str, + value: ReflectValueRef, +) { + print_start_field(buf, pretty, indent, first, field_name); + + match value { + ReflectValueRef::Message(m) => { + buf.push_str(" {"); + if pretty { + buf.push_str("\n"); + } + print_to_internal(m, buf, pretty, indent + 1); + do_indent(buf, pretty, indent); + buf.push_str("}"); + } + ReflectValueRef::Enum(e) => { + buf.push_str(": "); + buf.push_str(e.name()); + } + ReflectValueRef::String(s) => { + buf.push_str(": "); + print_str_to(s, buf); + } + ReflectValueRef::Bytes(b) => { + buf.push_str(": "); + quote_escape_bytes_to(b, buf); + } + ReflectValueRef::I32(v) => { + write!(buf, ": {}", v).unwrap(); + } + ReflectValueRef::I64(v) => { + write!(buf, ": {}", v).unwrap(); + } + ReflectValueRef::U32(v) => { + write!(buf, ": {}", v).unwrap(); + } + ReflectValueRef::U64(v) => { + write!(buf, ": {}", v).unwrap(); + } + ReflectValueRef::Bool(v) => { + write!(buf, ": {}", v).unwrap(); + } + ReflectValueRef::F32(v) => { + write!(buf, ": {}", v).unwrap(); + } + ReflectValueRef::F64(v) => { + write!(buf, ": {}", v).unwrap(); + } + } + + print_end_field(buf, pretty); +} + +fn print_to_internal(m: &Message, buf: &mut String, pretty: bool, indent: usize) { + let d = m.descriptor(); + let mut first = true; + for f in d.fields() { + match f.get_reflect(m) { + ReflectFieldRef::Map(map) => { + for (k, v) in map { + print_start_field(buf, pretty, indent, &mut first, f.name()); + buf.push_str(" {"); + if pretty { + buf.push_str("\n"); + } + + let mut entry_first = true; + + print_field(buf, pretty, indent + 1, &mut entry_first, "key", k.as_ref()); + print_field( + buf, + pretty, + indent + 1, + &mut entry_first, + "value", + v.as_ref(), + ); + do_indent(buf, pretty, indent); + buf.push_str("}"); + print_end_field(buf, pretty); + } + } + ReflectFieldRef::Repeated(repeated) => { + // TODO: do not print zeros for v3 + for v in repeated { + print_field(buf, pretty, indent, &mut first, f.name(), v.as_ref()); + } + } + ReflectFieldRef::Optional(optional) => { + if let Some(v) = optional { + print_field(buf, pretty, indent, &mut first, f.name(), v); + } + } + } + } + + // TODO: unknown fields +} + +/// Text-format +pub fn print_to(m: &Message, buf: &mut String) { + print_to_internal(m, buf, false, 0) +} + +fn print_to_string_internal(m: &Message, pretty: bool) -> String { + let mut r = String::new(); + print_to_internal(m, &mut r, pretty, 0); + r.to_string() +} + +/// Text-format +pub fn print_to_string(m: &Message) -> String { + print_to_string_internal(m, false) +} + +/// Text-format to `fmt::Formatter`. +pub fn fmt(m: &Message, f: &mut fmt::Formatter) -> fmt::Result { + let pretty = f.alternate(); + f.write_str(&print_to_string_internal(m, pretty)) +} + +#[cfg(test)] +mod test { + + fn escape(data: &[u8]) -> String { + let mut s = String::with_capacity(data.len() * 4); + super::quote_bytes_to(data, &mut s); + s + } + + fn test_escape_unescape(text: &str, escaped: &str) { + assert_eq!(text.as_bytes(), &super::unescape_string(escaped)[..]); + assert_eq!(escaped, &escape(text.as_bytes())[..]); + } + + #[test] + fn test_print_to_bytes() { + assert_eq!("ab", escape(b"ab")); + assert_eq!("a\\\\023", escape(b"a\\023")); + assert_eq!("a\\r\\n\\t '\\\"\\\\", escape(b"a\r\n\t '\"\\")); + assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes())); + } + + #[test] + fn test_unescape_string() { + test_escape_unescape("", ""); + test_escape_unescape("aa", "aa"); + test_escape_unescape("\n", "\\n"); + test_escape_unescape("\r", "\\r"); + test_escape_unescape("\t", "\\t"); + test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275"); + // hex + assert_eq!(b"aaa\x01bbb", &super::unescape_string("aaa\\x01bbb")[..]); + assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]); + assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]); + // quotes + assert_eq!(b"aaa\"bbb", &super::unescape_string("aaa\\\"bbb")[..]); + assert_eq!(b"aaa\'bbb", &super::unescape_string("aaa\\\'bbb")[..]); + } +} |