aboutsummaryrefslogtreecommitdiff
path: root/src/text_format.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/text_format.rs')
-rw-r--r--src/text_format.rs328
1 files changed, 328 insertions, 0 deletions
diff --git a/src/text_format.rs b/src/text_format.rs
new file mode 100644
index 0000000..7ba3721
--- /dev/null
+++ b/src/text_format.rs
@@ -0,0 +1,328 @@
+//! Protobuf "text format" implementation.
+//!
+//! Text format message look like this:
+//!
+//! ```text,ignore
+//! size: 17
+//! color: "red"
+//! children {
+//! size: 18
+//! color: "blue"
+//! }
+//! children {
+//! size: 19
+//! color: "green"
+//! }
+//! ```
+//!
+//! This format is not specified, but it is implemented by all official
+//! protobuf implementations, including `protoc` command which can decode
+//! and encode messages using text format.
+
+use core::Message;
+use reflect::ReflectFieldRef;
+use reflect::ReflectValueRef;
+use std;
+use std::fmt;
+use std::fmt::Write;
+
+fn quote_bytes_to(bytes: &[u8], buf: &mut String) {
+ for &c in bytes {
+ match c {
+ b'\n' => buf.push_str(r"\n"),
+ b'\r' => buf.push_str(r"\r"),
+ b'\t' => buf.push_str(r"\t"),
+ b'"' => buf.push_str("\\\""),
+ b'\\' => buf.push_str(r"\\"),
+ b'\x20'..=b'\x7e' => buf.push(c as char),
+ _ => {
+ buf.push('\\');
+ buf.push((b'0' + (c >> 6)) as char);
+ buf.push((b'0' + ((c >> 3) & 7)) as char);
+ buf.push((b'0' + (c & 7)) as char);
+ }
+ }
+ }
+}
+
+fn quote_escape_bytes_to(bytes: &[u8], buf: &mut String) {
+ buf.push('"');
+ quote_bytes_to(bytes, buf);
+ buf.push('"');
+}
+
+#[doc(hidden)]
+pub fn quote_escape_bytes(bytes: &[u8]) -> String {
+ let mut r = String::new();
+ quote_escape_bytes_to(bytes, &mut r);
+ r
+}
+
+#[doc(hidden)]
+pub fn unescape_string(string: &str) -> Vec<u8> {
+ fn parse_if_digit(chars: &mut std::str::Chars) -> u8 {
+ let mut copy = chars.clone();
+ let f = match copy.next() {
+ None => return 0,
+ Some(f) => f,
+ };
+ let d = match f {
+ '0'..='9' => (f as u8 - b'0'),
+ _ => return 0,
+ };
+ *chars = copy;
+ d
+ }
+
+ fn parse_hex_digit(chars: &mut std::str::Chars) -> u8 {
+ match chars.next().unwrap() {
+ c @ '0'..='9' => (c as u8) - b'0',
+ c @ 'a'..='f' => (c as u8) - b'a' + 10,
+ c @ 'A'..='F' => (c as u8) - b'A' + 10,
+ _ => panic!("incorrect hex escape"),
+ }
+ }
+
+ fn parse_escape_rem(chars: &mut std::str::Chars) -> u8 {
+ let n = chars.next().unwrap();
+ match n {
+ 'a' => return b'\x07',
+ 'b' => return b'\x08',
+ 'f' => return b'\x0c',
+ 'n' => return b'\n',
+ 'r' => return b'\r',
+ 't' => return b'\t',
+ 'v' => return b'\x0b',
+ '"' => return b'"',
+ '\'' => return b'\'',
+ '0'..='9' => {
+ let d1 = n as u8 - b'0';
+ let d2 = parse_if_digit(chars);
+ let d3 = parse_if_digit(chars);
+ return (d1 * 64 + d2 * 8 + d3) as u8;
+ }
+ 'x' => {
+ let d1 = parse_hex_digit(chars);
+ let d2 = parse_hex_digit(chars);
+ return d1 * 16 + d2;
+ }
+ c => return c as u8, // TODO: validate ASCII
+ };
+ }
+
+ let mut chars = string.chars();
+ let mut r = Vec::new();
+
+ loop {
+ let f = match chars.next() {
+ None => return r,
+ Some(f) => f,
+ };
+
+ if f == '\\' {
+ r.push(parse_escape_rem(&mut chars));
+ } else {
+ r.push(f as u8); // TODO: escape UTF-8
+ }
+ }
+}
+
+fn print_str_to(s: &str, buf: &mut String) {
+ // TODO: keep printable Unicode
+ quote_escape_bytes_to(s.as_bytes(), buf);
+}
+
+fn do_indent(buf: &mut String, pretty: bool, indent: usize) {
+ if pretty && indent > 0 {
+ for _ in 0..indent {
+ buf.push_str(" ");
+ }
+ }
+}
+
+fn print_start_field(
+ buf: &mut String,
+ pretty: bool,
+ indent: usize,
+ first: &mut bool,
+ field_name: &str,
+) {
+ if !*first && !pretty {
+ buf.push_str(" ");
+ }
+ do_indent(buf, pretty, indent);
+ *first = false;
+ buf.push_str(field_name);
+}
+
+fn print_end_field(buf: &mut String, pretty: bool) {
+ if pretty {
+ buf.push_str("\n");
+ }
+}
+
+fn print_field(
+ buf: &mut String,
+ pretty: bool,
+ indent: usize,
+ first: &mut bool,
+ field_name: &str,
+ value: ReflectValueRef,
+) {
+ print_start_field(buf, pretty, indent, first, field_name);
+
+ match value {
+ ReflectValueRef::Message(m) => {
+ buf.push_str(" {");
+ if pretty {
+ buf.push_str("\n");
+ }
+ print_to_internal(m, buf, pretty, indent + 1);
+ do_indent(buf, pretty, indent);
+ buf.push_str("}");
+ }
+ ReflectValueRef::Enum(e) => {
+ buf.push_str(": ");
+ buf.push_str(e.name());
+ }
+ ReflectValueRef::String(s) => {
+ buf.push_str(": ");
+ print_str_to(s, buf);
+ }
+ ReflectValueRef::Bytes(b) => {
+ buf.push_str(": ");
+ quote_escape_bytes_to(b, buf);
+ }
+ ReflectValueRef::I32(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ ReflectValueRef::I64(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ ReflectValueRef::U32(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ ReflectValueRef::U64(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ ReflectValueRef::Bool(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ ReflectValueRef::F32(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ ReflectValueRef::F64(v) => {
+ write!(buf, ": {}", v).unwrap();
+ }
+ }
+
+ print_end_field(buf, pretty);
+}
+
+fn print_to_internal(m: &Message, buf: &mut String, pretty: bool, indent: usize) {
+ let d = m.descriptor();
+ let mut first = true;
+ for f in d.fields() {
+ match f.get_reflect(m) {
+ ReflectFieldRef::Map(map) => {
+ for (k, v) in map {
+ print_start_field(buf, pretty, indent, &mut first, f.name());
+ buf.push_str(" {");
+ if pretty {
+ buf.push_str("\n");
+ }
+
+ let mut entry_first = true;
+
+ print_field(buf, pretty, indent + 1, &mut entry_first, "key", k.as_ref());
+ print_field(
+ buf,
+ pretty,
+ indent + 1,
+ &mut entry_first,
+ "value",
+ v.as_ref(),
+ );
+ do_indent(buf, pretty, indent);
+ buf.push_str("}");
+ print_end_field(buf, pretty);
+ }
+ }
+ ReflectFieldRef::Repeated(repeated) => {
+ // TODO: do not print zeros for v3
+ for v in repeated {
+ print_field(buf, pretty, indent, &mut first, f.name(), v.as_ref());
+ }
+ }
+ ReflectFieldRef::Optional(optional) => {
+ if let Some(v) = optional {
+ print_field(buf, pretty, indent, &mut first, f.name(), v);
+ }
+ }
+ }
+ }
+
+ // TODO: unknown fields
+}
+
+/// Text-format
+pub fn print_to(m: &Message, buf: &mut String) {
+ print_to_internal(m, buf, false, 0)
+}
+
+fn print_to_string_internal(m: &Message, pretty: bool) -> String {
+ let mut r = String::new();
+ print_to_internal(m, &mut r, pretty, 0);
+ r.to_string()
+}
+
+/// Text-format
+pub fn print_to_string(m: &Message) -> String {
+ print_to_string_internal(m, false)
+}
+
+/// Text-format to `fmt::Formatter`.
+pub fn fmt(m: &Message, f: &mut fmt::Formatter) -> fmt::Result {
+ let pretty = f.alternate();
+ f.write_str(&print_to_string_internal(m, pretty))
+}
+
+#[cfg(test)]
+mod test {
+
+ fn escape(data: &[u8]) -> String {
+ let mut s = String::with_capacity(data.len() * 4);
+ super::quote_bytes_to(data, &mut s);
+ s
+ }
+
+ fn test_escape_unescape(text: &str, escaped: &str) {
+ assert_eq!(text.as_bytes(), &super::unescape_string(escaped)[..]);
+ assert_eq!(escaped, &escape(text.as_bytes())[..]);
+ }
+
+ #[test]
+ fn test_print_to_bytes() {
+ assert_eq!("ab", escape(b"ab"));
+ assert_eq!("a\\\\023", escape(b"a\\023"));
+ assert_eq!("a\\r\\n\\t '\\\"\\\\", escape(b"a\r\n\t '\"\\"));
+ assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes()));
+ }
+
+ #[test]
+ fn test_unescape_string() {
+ test_escape_unescape("", "");
+ test_escape_unescape("aa", "aa");
+ test_escape_unescape("\n", "\\n");
+ test_escape_unescape("\r", "\\r");
+ test_escape_unescape("\t", "\\t");
+ test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275");
+ // hex
+ assert_eq!(b"aaa\x01bbb", &super::unescape_string("aaa\\x01bbb")[..]);
+ assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]);
+ assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]);
+ // quotes
+ assert_eq!(b"aaa\"bbb", &super::unescape_string("aaa\\\"bbb")[..]);
+ assert_eq!(b"aaa\'bbb", &super::unescape_string("aaa\\\'bbb")[..]);
+ }
+}