From 4152c49a9432c5e51e891f1c34b7e20446496aea Mon Sep 17 00:00:00 2001
From: Jeff Vander Stoep <jeffv@google.com>
Date: Thu, 9 Mar 2023 10:32:38 +0100
Subject: Import protobuf-support 3.2.0

Test: Treehugger
Bug: 270895633
Change-Id: I0e7e700aaa7eab95a91757ea147b08b205db2d0c
---
 .cargo_vcs_info.json         |   6 +
 Android.bp                   |  24 ++
 Cargo.toml                   |  35 +++
 Cargo.toml.orig              |  24 ++
 LICENSE                      |  19 ++
 LICENSE.txt                  |  19 ++
 METADATA                     |  19 ++
 MODULE_LICENSE_MIT           |   0
 OWNERS                       |   1 +
 README.md                    |   8 +
 cargo2android.json           |  10 +
 src/json_name.rs             |  19 ++
 src/lexer/float.rs           |  56 ++++
 src/lexer/int.rs             |  12 +
 src/lexer/json_number_lit.rs |  10 +
 src/lexer/lexer_impl.rs      | 712 +++++++++++++++++++++++++++++++++++++++++++
 src/lexer/loc.rs             |  28 ++
 src/lexer/mod.rs             |  12 +
 src/lexer/num_lit.rs         |   5 +
 src/lexer/parser_language.rs |  10 +
 src/lexer/str_lit.rs         |  77 +++++
 src/lexer/token.rs           |  47 +++
 src/lexer/tokenizer.rs       | 330 ++++++++++++++++++++
 src/lib.rs                   |   9 +
 src/text_format.rs           |  75 +++++
 src/toposort.rs              | 119 ++++++++
 26 files changed, 1686 insertions(+)
 create mode 100644 .cargo_vcs_info.json
 create mode 100644 Android.bp
 create mode 100644 Cargo.toml
 create mode 100644 Cargo.toml.orig
 create mode 100644 LICENSE
 create mode 100644 LICENSE.txt
 create mode 100644 METADATA
 create mode 100644 MODULE_LICENSE_MIT
 create mode 100644 OWNERS
 create mode 100644 README.md
 create mode 100644 cargo2android.json
 create mode 100644 src/json_name.rs
 create mode 100644 src/lexer/float.rs
 create mode 100644 src/lexer/int.rs
 create mode 100644 src/lexer/json_number_lit.rs
 create mode 100644 src/lexer/lexer_impl.rs
 create mode 100644 src/lexer/loc.rs
 create mode 100644 src/lexer/mod.rs
 create mode 100644 src/lexer/num_lit.rs
 create mode 100644 src/lexer/parser_language.rs
 create mode 100644 src/lexer/str_lit.rs
 create mode 100644 src/lexer/token.rs
 create mode 100644 src/lexer/tokenizer.rs
 create mode 100644 src/lib.rs
 create mode 100644 src/text_format.rs
 create mode 100644 src/toposort.rs

diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..1981be8
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,6 @@
+{
+  "git": {
+    "sha1": "7155092f3df112159d55132081937e1fe5c30490"
+  },
+  "path_in_vcs": "protobuf-support"
+}
\ No newline at end of file
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..678d57a
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,24 @@
+// This file is generated by cargo2android.py --config cargo2android.json.
+// Do not modify this file as changes will be overridden on upgrade.
+
+
+
+rust_library {
+    name: "libprotobuf_support",
+    host_supported: true,
+    crate_name: "protobuf_support",
+    cargo_env_compat: true,
+    cargo_pkg_version: "3.2.0",
+    srcs: ["src/lib.rs"],
+    edition: "2021",
+    rustlibs: [
+        "libthiserror",
+    ],
+    apex_available: [
+        "//apex_available:platform",
+        "//apex_available:anyapex",
+    ],
+    product_available: true,
+    vendor_available: true,
+    min_sdk_version: "29",
+}
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..0f7368b
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,35 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "protobuf-support"
+version = "3.2.0"
+authors = ["Stepan Koltsov <stepan.koltsov@gmail.com>"]
+description = """
+Code supporting protobuf implementation. None of code in this crate is public API.
+"""
+homepage = "https://github.com/stepancheg/rust-protobuf/"
+documentation = "https://github.com/stepancheg/rust-protobuf/blob/master/README.md"
+readme = "README.md"
+license = "MIT"
+repository = "https://github.com/stepancheg/rust-protobuf/"
+
+[package.metadata.docs.rs]
+all-features = true
+
+[lib]
+bench = false
+
+[dependencies.thiserror]
+version = "1.0.30"
+
+[features]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..ee6fc99
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,24 @@
+[package]
+
+name = "protobuf-support"
+version = "3.2.0"
+authors = ["Stepan Koltsov <stepan.koltsov@gmail.com>"]
+edition = "2021"
+license = "MIT"
+homepage = "https://github.com/stepancheg/rust-protobuf/"
+repository = "https://github.com/stepancheg/rust-protobuf/"
+documentation = "https://github.com/stepancheg/rust-protobuf/blob/master/README.md"
+description = """
+Code supporting protobuf implementation. None of code in this crate is public API.
+"""
+
+[lib]
+bench = false
+
+[features]
+
+[dependencies]
+thiserror = "1.0.30"
+
+[package.metadata.docs.rs]
+all-features = true
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..acce639
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2019 Stepan Koltsov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..acce639
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2019 Stepan Koltsov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..517ef56
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,19 @@
+name: "protobuf-support"
+description: "()"
+third_party {
+  url {
+    type: HOMEPAGE
+    value: "https://crates.io/crates/protobuf-support"
+  }
+  url {
+    type: ARCHIVE
+    value: "https://static.crates.io/crates/protobuf-support/protobuf-support-3.2.0.crate"
+  }
+  version: "3.2.0"
+  license_type: NOTICE
+  last_upgrade_date {
+    year: 2023
+    month: 2
+    day: 27
+  }
+}
diff --git a/MODULE_LICENSE_MIT b/MODULE_LICENSE_MIT
new file mode 100644
index 0000000..e69de29
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..45dc4dd
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1 @@
+include platform/prebuilts/rust:master:/OWNERS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..88bdebb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+<!-- cargo-sync-readme start -->
+
+# Supporting code for protobuf crates
+
+Code in this crate is used in protobuf crates like `protobuf` or `protobuf-parse`.
+None of code in this crate has public API.
+
+<!-- cargo-sync-readme end -->
diff --git a/cargo2android.json b/cargo2android.json
new file mode 100644
index 0000000..26ba70d
--- /dev/null
+++ b/cargo2android.json
@@ -0,0 +1,10 @@
+{
+  "apex-available": [
+    "//apex_available:platform",
+    "//apex_available:anyapex"
+  ],
+  "min-sdk-version": "29",
+  "dependencies": true,
+  "device": true,
+  "run": true
+}
diff --git a/src/json_name.rs b/src/json_name.rs
new file mode 100644
index 0000000..f5c9364
--- /dev/null
+++ b/src/json_name.rs
@@ -0,0 +1,19 @@
+/// Implementation must match exactly
+/// `ToJsonName()` function in C++ `descriptor.cc`.
+pub fn json_name(input: &str) -> String {
+    let mut capitalize_next = false;
+    let mut result = String::with_capacity(input.len());
+
+    for c in input.chars() {
+        if c == '_' {
+            capitalize_next = true;
+        } else if capitalize_next {
+            result.extend(c.to_uppercase());
+            capitalize_next = false;
+        } else {
+            result.push(c);
+        }
+    }
+
+    result
+}
diff --git a/src/lexer/float.rs b/src/lexer/float.rs
new file mode 100644
index 0000000..f09c101
--- /dev/null
+++ b/src/lexer/float.rs
@@ -0,0 +1,56 @@
+#[derive(Debug)]
+pub enum ProtobufFloatParseError {
+    EmptyString,
+    CannotParseFloat,
+}
+
+pub type ProtobufFloatParseResult<T> = Result<T, ProtobufFloatParseError>;
+
+pub const PROTOBUF_NAN: &str = "nan";
+pub const PROTOBUF_INF: &str = "inf";
+
+/// Format float as in protobuf `.proto` files
+pub fn format_protobuf_float(f: f64) -> String {
+    if f.is_nan() {
+        PROTOBUF_NAN.to_owned()
+    } else if f.is_infinite() {
+        if f > 0.0 {
+            format!("{}", PROTOBUF_INF)
+        } else {
+            format!("-{}", PROTOBUF_INF)
+        }
+    } else {
+        // TODO: make sure doesn't lose precision
+        format!("{}", f)
+    }
+}
+
+/// Parse float from `.proto` format
+pub fn parse_protobuf_float(s: &str) -> ProtobufFloatParseResult<f64> {
+    if s.is_empty() {
+        return Err(ProtobufFloatParseError::EmptyString);
+    }
+    if s == PROTOBUF_NAN {
+        return Ok(f64::NAN);
+    }
+    if s == PROTOBUF_INF || s == format!("+{}", PROTOBUF_INF) {
+        return Ok(f64::INFINITY);
+    }
+    if s == format!("-{}", PROTOBUF_INF) {
+        return Ok(f64::NEG_INFINITY);
+    }
+    match s.parse() {
+        Ok(f) => Ok(f),
+        Err(_) => Err(ProtobufFloatParseError::CannotParseFloat),
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_format_protobuf_float() {
+        assert_eq!("10", format_protobuf_float(10.0));
+    }
+}
diff --git a/src/lexer/int.rs b/src/lexer/int.rs
new file mode 100644
index 0000000..676c1ba
--- /dev/null
+++ b/src/lexer/int.rs
@@ -0,0 +1,12 @@
+pub struct Overflow;
+
+/// Negate `u64` checking for overflow.
+pub fn neg(value: u64) -> Result<i64, Overflow> {
+    if value <= 0x7fff_ffff_ffff_ffff {
+        Ok(-(value as i64))
+    } else if value == 0x8000_0000_0000_0000 {
+        Ok(-0x8000_0000_0000_0000)
+    } else {
+        Err(Overflow)
+    }
+}
diff --git a/src/lexer/json_number_lit.rs b/src/lexer/json_number_lit.rs
new file mode 100644
index 0000000..1323517
--- /dev/null
+++ b/src/lexer/json_number_lit.rs
@@ -0,0 +1,10 @@
+use std::fmt;
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct JsonNumberLit(pub String);
+
+impl fmt::Display for JsonNumberLit {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Display::fmt(&self.0, f)
+    }
+}
diff --git a/src/lexer/lexer_impl.rs b/src/lexer/lexer_impl.rs
new file mode 100644
index 0000000..4990c8f
--- /dev/null
+++ b/src/lexer/lexer_impl.rs
@@ -0,0 +1,712 @@
+use std::char;
+use std::convert::TryFrom;
+use std::num::ParseFloatError;
+use std::num::ParseIntError;
+
+use crate::lexer::float;
+use crate::lexer::float::ProtobufFloatParseError;
+use crate::lexer::json_number_lit::JsonNumberLit;
+use crate::lexer::loc::Loc;
+use crate::lexer::loc::FIRST_COL;
+use crate::lexer::parser_language::ParserLanguage;
+use crate::lexer::str_lit::StrLit;
+use crate::lexer::str_lit::StrLitDecodeError;
+use crate::lexer::token::Token;
+use crate::lexer::token::TokenWithLocation;
+
+#[derive(Debug, thiserror::Error)]
+pub enum LexerError {
+    // TODO: something better than this
+    #[error("Incorrect input")]
+    IncorrectInput,
+    #[error("Unexpected EOF")]
+    UnexpectedEof,
+    #[error("Expecting char: {:?}", .0)]
+    ExpectChar(char),
+    #[error("Parse int error")]
+    ParseIntError,
+    #[error("Parse float error")]
+    ParseFloatError,
+    // TODO: how it is different from ParseFloatError?
+    #[error("Incorrect float literal")]
+    IncorrectFloatLit,
+    #[error("Incorrect JSON escape")]
+    IncorrectJsonEscape,
+    #[error("Incorrect JSON number")]
+    IncorrectJsonNumber,
+    #[error("Incorrect Unicode character")]
+    IncorrectUnicodeChar,
+    #[error("Expecting hex digit")]
+    ExpectHexDigit,
+    #[error("Expecting oct digit")]
+    ExpectOctDigit,
+    #[error("Expecting dec digit")]
+    ExpectDecDigit,
+    #[error(transparent)]
+    StrLitDecodeError(#[from] StrLitDecodeError),
+    #[error("Expecting identifier")]
+    ExpectedIdent,
+}
+
+pub type LexerResult<T> = Result<T, LexerError>;
+
+impl From<ParseIntError> for LexerError {
+    fn from(_: ParseIntError) -> Self {
+        LexerError::ParseIntError
+    }
+}
+
+impl From<ParseFloatError> for LexerError {
+    fn from(_: ParseFloatError) -> Self {
+        LexerError::ParseFloatError
+    }
+}
+
+impl From<ProtobufFloatParseError> for LexerError {
+    fn from(_: ProtobufFloatParseError) -> Self {
+        LexerError::IncorrectFloatLit
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct Lexer<'a> {
+    language: ParserLanguage,
+    input: &'a str,
+    pos: usize,
+    pub loc: Loc,
+}
+
+fn is_letter(c: char) -> bool {
+    c.is_alphabetic() || c == '_'
+}
+
+impl<'a> Lexer<'a> {
+    pub fn new(input: &'a str, language: ParserLanguage) -> Lexer<'a> {
+        Lexer {
+            language,
+            input,
+            pos: 0,
+            loc: Loc::start(),
+        }
+    }
+
+    /// No more chars
+    pub fn eof(&self) -> bool {
+        self.pos == self.input.len()
+    }
+
+    /// Remaining chars
+    fn rem_chars(&self) -> &'a str {
+        &self.input[self.pos..]
+    }
+
+    pub fn lookahead_char_is<P: FnOnce(char) -> bool>(&self, p: P) -> bool {
+        self.lookahead_char().map_or(false, p)
+    }
+
+    fn lookahead_char_is_in(&self, alphabet: &str) -> bool {
+        self.lookahead_char_is(|c| alphabet.contains(c))
+    }
+
+    fn next_char_opt(&mut self) -> Option<char> {
+        let rem = self.rem_chars();
+        if rem.is_empty() {
+            None
+        } else {
+            let mut char_indices = rem.char_indices();
+            let (_, c) = char_indices.next().unwrap();
+            let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len());
+            self.pos += c_len;
+            if c == '\n' {
+                self.loc.line += 1;
+                self.loc.col = FIRST_COL;
+            } else {
+                self.loc.col += 1;
+            }
+            Some(c)
+        }
+    }
+
+    fn next_char(&mut self) -> LexerResult<char> {
+        self.next_char_opt().ok_or(LexerError::UnexpectedEof)
+    }
+
+    /// Skip whitespaces
+    fn skip_whitespaces(&mut self) {
+        self.take_while(|c| c.is_whitespace());
+    }
+
+    fn skip_c_comment(&mut self) -> LexerResult<()> {
+        if self.skip_if_lookahead_is_str("/*") {
+            let end = "*/";
+            match self.rem_chars().find(end) {
+                None => Err(LexerError::UnexpectedEof),
+                Some(len) => {
+                    let new_pos = self.pos + len + end.len();
+                    self.skip_to_pos(new_pos);
+                    Ok(())
+                }
+            }
+        } else {
+            Ok(())
+        }
+    }
+
+    fn skip_cpp_comment(&mut self) {
+        if self.skip_if_lookahead_is_str("//") {
+            loop {
+                match self.next_char_opt() {
+                    Some('\n') | None => break,
+                    _ => {}
+                }
+            }
+        }
+    }
+
+    fn skip_sh_comment(&mut self) {
+        if self.skip_if_lookahead_is_str("#") {
+            loop {
+                match self.next_char_opt() {
+                    Some('\n') | None => break,
+                    _ => {}
+                }
+            }
+        }
+    }
+
+    fn skip_comment(&mut self) -> LexerResult<()> {
+        match self.language {
+            ParserLanguage::Proto => {
+                self.skip_c_comment()?;
+                self.skip_cpp_comment();
+            }
+            ParserLanguage::TextFormat => {
+                self.skip_sh_comment();
+            }
+            ParserLanguage::Json => {}
+        }
+        Ok(())
+    }
+
+    pub fn skip_ws(&mut self) -> LexerResult<()> {
+        loop {
+            let pos = self.pos;
+            self.skip_whitespaces();
+            self.skip_comment()?;
+            if pos == self.pos {
+                // Did not advance
+                return Ok(());
+            }
+        }
+    }
+
+    pub fn take_while<F>(&mut self, f: F) -> &'a str
+    where
+        F: Fn(char) -> bool,
+    {
+        let start = self.pos;
+        while self.lookahead_char().map(&f) == Some(true) {
+            self.next_char_opt().unwrap();
+        }
+        let end = self.pos;
+        &self.input[start..end]
+    }
+
+    fn lookahead_char(&self) -> Option<char> {
+        self.clone().next_char_opt()
+    }
+
+    fn lookahead_is_str(&self, s: &str) -> bool {
+        self.rem_chars().starts_with(s)
+    }
+
+    fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool {
+        if self.lookahead_is_str(s) {
+            let new_pos = self.pos + s.len();
+            self.skip_to_pos(new_pos);
+            true
+        } else {
+            false
+        }
+    }
+
+    fn next_char_if<P>(&mut self, p: P) -> Option<char>
+    where
+        P: FnOnce(char) -> bool,
+    {
+        let mut clone = self.clone();
+        match clone.next_char_opt() {
+            Some(c) if p(c) => {
+                *self = clone;
+                Some(c)
+            }
+            _ => None,
+        }
+    }
+
+    pub fn next_char_if_eq(&mut self, expect: char) -> bool {
+        self.next_char_if(|c| c == expect) != None
+    }
+
+    fn next_char_if_in(&mut self, alphabet: &str) -> Option<char> {
+        for c in alphabet.chars() {
+            if self.next_char_if_eq(c) {
+                return Some(c);
+            }
+        }
+        None
+    }
+
+    fn next_char_expect_eq(&mut self, expect: char) -> LexerResult<()> {
+        if self.next_char_if_eq(expect) {
+            Ok(())
+        } else {
+            Err(LexerError::ExpectChar(expect))
+        }
+    }
+
+    fn next_char_expect<P>(&mut self, expect: P, err: LexerError) -> LexerResult<char>
+    where
+        P: FnOnce(char) -> bool,
+    {
+        self.next_char_if(expect).ok_or(err)
+    }
+
+    // str functions
+
+    /// properly update line and column
+    fn skip_to_pos(&mut self, new_pos: usize) -> &'a str {
+        assert!(new_pos >= self.pos);
+        assert!(new_pos <= self.input.len());
+        let pos = self.pos;
+        while self.pos != new_pos {
+            self.next_char_opt().unwrap();
+        }
+        &self.input[pos..new_pos]
+    }
+
+    // Protobuf grammar
+
+    // char functions
+
+    // letter = "A" … "Z" | "a" … "z"
+    // https://github.com/google/protobuf/issues/4565
+    fn next_letter_opt(&mut self) -> Option<char> {
+        self.next_char_if(is_letter)
+    }
+
+    // capitalLetter =  "A" … "Z"
+    fn _next_capital_letter_opt(&mut self) -> Option<char> {
+        self.next_char_if(|c| c >= 'A' && c <= 'Z')
+    }
+
+    fn next_ident_part(&mut self) -> Option<char> {
+        self.next_char_if(|c| c.is_ascii_alphanumeric() || c == '_')
+    }
+
+    // Identifiers
+
+    // ident = letter { letter | decimalDigit | "_" }
+    fn next_ident_opt(&mut self) -> LexerResult<Option<String>> {
+        if let Some(c) = self.next_letter_opt() {
+            let mut ident = String::new();
+            ident.push(c);
+            while let Some(c) = self.next_ident_part() {
+                ident.push(c);
+            }
+            Ok(Some(ident))
+        } else {
+            Ok(None)
+        }
+    }
+
+    // Integer literals
+
+    // hexLit     = "0" ( "x" | "X" ) hexDigit { hexDigit }
+    fn next_hex_lit_opt(&mut self) -> LexerResult<Option<u64>> {
+        Ok(
+            if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") {
+                let s = self.take_while(|c| c.is_ascii_hexdigit());
+                Some(u64::from_str_radix(s, 16)? as u64)
+            } else {
+                None
+            },
+        )
+    }
+
+    // decimalLit = ( "1" … "9" ) { decimalDigit }
+    // octalLit   = "0" { octalDigit }
+    fn next_decimal_octal_lit_opt(&mut self) -> LexerResult<Option<u64>> {
+        // do not advance on number parse error
+        let mut clone = self.clone();
+
+        let pos = clone.pos;
+
+        Ok(if clone.next_char_if(|c| c.is_ascii_digit()) != None {
+            clone.take_while(|c| c.is_ascii_digit());
+            let value = clone.input[pos..clone.pos].parse()?;
+            *self = clone;
+            Some(value)
+        } else {
+            None
+        })
+    }
+
+    // hexDigit     = "0" … "9" | "A" … "F" | "a" … "f"
+    fn next_hex_digit(&mut self) -> LexerResult<u32> {
+        let mut clone = self.clone();
+        let r = match clone.next_char()? {
+            c if c >= '0' && c <= '9' => c as u32 - b'0' as u32,
+            c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10,
+            c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10,
+            _ => return Err(LexerError::ExpectHexDigit),
+        };
+        *self = clone;
+        Ok(r)
+    }
+
+    // octalDigit   = "0" … "7"
+    fn next_octal_digit(&mut self) -> LexerResult<u32> {
+        self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectOctDigit)
+            .map(|c| c as u32 - '0' as u32)
+    }
+
+    // decimalDigit = "0" … "9"
+    fn next_decimal_digit(&mut self) -> LexerResult<u32> {
+        self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectDecDigit)
+            .map(|c| c as u32 - '0' as u32)
+    }
+
+    // decimals  = decimalDigit { decimalDigit }
+    fn next_decimal_digits(&mut self) -> LexerResult<()> {
+        self.next_decimal_digit()?;
+        self.take_while(|c| c >= '0' && c <= '9');
+        Ok(())
+    }
+
+    // intLit     = decimalLit | octalLit | hexLit
+    pub fn next_int_lit_opt(&mut self) -> LexerResult<Option<u64>> {
+        assert_ne!(ParserLanguage::Json, self.language);
+
+        self.skip_ws()?;
+        if let Some(i) = self.next_hex_lit_opt()? {
+            return Ok(Some(i));
+        }
+        if let Some(i) = self.next_decimal_octal_lit_opt()? {
+            return Ok(Some(i));
+        }
+        Ok(None)
+    }
+
+    // Floating-point literals
+
+    // exponent  = ( "e" | "E" ) [ "+" | "-" ] decimals
+    fn next_exponent_opt(&mut self) -> LexerResult<Option<()>> {
+        if self.next_char_if_in("eE") != None {
+            self.next_char_if_in("+-");
+            self.next_decimal_digits()?;
+            Ok(Some(()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan"
+    fn next_float_lit(&mut self) -> LexerResult<()> {
+        assert_ne!(ParserLanguage::Json, self.language);
+
+        // "inf" and "nan" are handled as part of ident
+        if self.next_char_if_eq('.') {
+            self.next_decimal_digits()?;
+            self.next_exponent_opt()?;
+        } else {
+            self.next_decimal_digits()?;
+            if self.next_char_if_eq('.') {
+                self.next_decimal_digits()?;
+                self.next_exponent_opt()?;
+            } else {
+                if self.next_exponent_opt()? == None {
+                    return Err(LexerError::IncorrectFloatLit);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    // String literals
+
+    // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/
+    // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit
+    // https://github.com/google/protobuf/issues/4560
+    // octEscape = '\' octalDigit octalDigit octalDigit
+    // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
+    // quote = "'" | '"'
+    pub fn next_byte_value(&mut self) -> LexerResult<u8> {
+        match self.next_char()? {
+            '\\' => {
+                match self.next_char()? {
+                    '\'' => Ok(b'\''),
+                    '"' => Ok(b'"'),
+                    '\\' => Ok(b'\\'),
+                    'a' => Ok(b'\x07'),
+                    'b' => Ok(b'\x08'),
+                    'f' => Ok(b'\x0c'),
+                    'n' => Ok(b'\n'),
+                    'r' => Ok(b'\r'),
+                    't' => Ok(b'\t'),
+                    'v' => Ok(b'\x0b'),
+                    'x' => {
+                        let d1 = self.next_hex_digit()? as u8;
+                        let d2 = self.next_hex_digit()? as u8;
+                        Ok(((d1 << 4) | d2) as u8)
+                    }
+                    d if d >= '0' && d <= '7' => {
+                        let mut r = d as u8 - b'0';
+                        for _ in 0..2 {
+                            match self.next_octal_digit() {
+                                Err(_) => break,
+                                Ok(d) => r = (r << 3) + d as u8,
+                            }
+                        }
+                        Ok(r)
+                    }
+                    // https://github.com/google/protobuf/issues/4562
+                    // TODO: overflow
+                    c => Ok(c as u8),
+                }
+            }
+            '\n' | '\0' => Err(LexerError::IncorrectInput),
+            // TODO: check overflow
+            c => Ok(c as u8),
+        }
+    }
+
+    fn char_try_from(i: u32) -> LexerResult<char> {
+        char::try_from(i).map_err(|_| LexerError::IncorrectUnicodeChar)
+    }
+
+    pub fn next_json_char_value(&mut self) -> LexerResult<char> {
+        match self.next_char()? {
+            '\\' => match self.next_char()? {
+                '"' => Ok('"'),
+                '\'' => Ok('\''),
+                '\\' => Ok('\\'),
+                '/' => Ok('/'),
+                'b' => Ok('\x08'),
+                'f' => Ok('\x0c'),
+                'n' => Ok('\n'),
+                'r' => Ok('\r'),
+                't' => Ok('\t'),
+                'u' => {
+                    let mut v = 0;
+                    for _ in 0..4 {
+                        let digit = self.next_hex_digit()?;
+                        v = v * 16 + digit;
+                    }
+                    Self::char_try_from(v)
+                }
+                _ => Err(LexerError::IncorrectJsonEscape),
+            },
+            c => Ok(c),
+        }
+    }
+
+    // https://github.com/google/protobuf/issues/4564
+    // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' )
+    fn next_str_lit_raw(&mut self) -> LexerResult<String> {
+        let mut raw = String::new();
+
+        let mut first = true;
+        loop {
+            if !first {
+                self.skip_ws()?;
+            }
+
+            let start = self.pos;
+
+            let q = match self.next_char_if_in("'\"") {
+                Some(q) => q,
+                None if !first => break,
+                None => return Err(LexerError::IncorrectInput),
+            };
+            first = false;
+            while self.lookahead_char() != Some(q) {
+                self.next_byte_value()?;
+            }
+            self.next_char_expect_eq(q)?;
+
+            raw.push_str(&self.input[start + 1..self.pos - 1]);
+        }
+        Ok(raw)
+    }
+
+    fn next_str_lit_raw_opt(&mut self) -> LexerResult<Option<String>> {
+        if self.lookahead_char_is_in("'\"") {
+            Ok(Some(self.next_str_lit_raw()?))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Parse next token as JSON number
+    fn next_json_number_opt(&mut self) -> LexerResult<Option<JsonNumberLit>> {
+        assert_eq!(ParserLanguage::Json, self.language);
+
+        fn is_digit(c: char) -> bool {
+            c >= '0' && c <= '9'
+        }
+
+        fn is_digit_1_9(c: char) -> bool {
+            c >= '1' && c <= '9'
+        }
+
+        if !self.lookahead_char_is_in("-0123456789") {
+            return Ok(None);
+        }
+
+        let mut s = String::new();
+        if self.next_char_if_eq('-') {
+            s.push('-');
+        }
+
+        if self.next_char_if_eq('0') {
+            s.push('0');
+        } else {
+            s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?);
+            while let Some(c) = self.next_char_if(is_digit) {
+                s.push(c);
+            }
+        }
+
+        if self.next_char_if_eq('.') {
+            s.push('.');
+            s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?);
+            while let Some(c) = self.next_char_if(is_digit) {
+                s.push(c);
+            }
+        }
+
+        if let Some(c) = self.next_char_if_in("eE") {
+            s.push(c);
+            if let Some(c) = self.next_char_if_in("+-") {
+                s.push(c);
+            }
+            s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?);
+            while let Some(c) = self.next_char_if(is_digit) {
+                s.push(c);
+            }
+        }
+
+        Ok(Some(JsonNumberLit(s)))
+    }
+
+    fn next_token_inner(&mut self) -> LexerResult<Token> {
+        if self.language == ParserLanguage::Json {
+            if let Some(v) = self.next_json_number_opt()? {
+                return Ok(Token::JsonNumber(v));
+            }
+        }
+
+        if let Some(ident) = self.next_ident_opt()? {
+            let token = if self.language != ParserLanguage::Json && ident == float::PROTOBUF_NAN {
+                Token::FloatLit(f64::NAN)
+            } else if self.language != ParserLanguage::Json && ident == float::PROTOBUF_INF {
+                Token::FloatLit(f64::INFINITY)
+            } else {
+                Token::Ident(ident.to_owned())
+            };
+            return Ok(token);
+        }
+
+        if self.language != ParserLanguage::Json {
+            let mut clone = self.clone();
+            let pos = clone.pos;
+            if let Ok(_) = clone.next_float_lit() {
+                let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?;
+                *self = clone;
+                return Ok(Token::FloatLit(f));
+            }
+
+            if let Some(lit) = self.next_int_lit_opt()? {
+                return Ok(Token::IntLit(lit));
+            }
+        }
+
+        if let Some(escaped) = self.next_str_lit_raw_opt()? {
+            return Ok(Token::StrLit(StrLit { escaped }));
+        }
+
+        // This branch must be after str lit
+        if let Some(c) = self.next_char_if(|c| c.is_ascii_punctuation()) {
+            return Ok(Token::Symbol(c));
+        }
+
+        if let Some(ident) = self.next_ident_opt()? {
+            return Ok(Token::Ident(ident));
+        }
+
+        Err(LexerError::IncorrectInput)
+    }
+
+    pub fn next_token(&mut self) -> LexerResult<Option<TokenWithLocation>> {
+        self.skip_ws()?;
+        let loc = self.loc;
+
+        Ok(if self.eof() {
+            None
+        } else {
+            let token = self.next_token_inner()?;
+            // Skip whitespace here to update location
+            // to the beginning of the next token
+            self.skip_ws()?;
+            Some(TokenWithLocation { token, loc })
+        })
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    fn lex<P, R>(input: &str, parse_what: P) -> R
+    where
+        P: FnOnce(&mut Lexer) -> LexerResult<R>,
+    {
+        let mut lexer = Lexer::new(input, ParserLanguage::Proto);
+        let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
+        assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
+        r
+    }
+
+    fn lex_opt<P, R>(input: &str, parse_what: P) -> R
+    where
+        P: FnOnce(&mut Lexer) -> LexerResult<Option<R>>,
+    {
+        let mut lexer = Lexer::new(input, ParserLanguage::Proto);
+        let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
+        let r = o.expect(&format!("lexer returned none at {}", lexer.loc));
+        assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
+        r
+    }
+
+    #[test]
+    fn test_lexer_int_lit() {
+        let msg = r#"10"#;
+        let mess = lex_opt(msg, |p| p.next_int_lit_opt());
+        assert_eq!(10, mess);
+    }
+
+    #[test]
+    fn test_lexer_float_lit() {
+        let msg = r#"12.3"#;
+        let mess = lex(msg, |p| p.next_token_inner());
+        assert_eq!(Token::FloatLit(12.3), mess);
+    }
+
+    #[test]
+    fn test_lexer_float_lit_leading_zeros_in_exp() {
+        let msg = r#"1e00009"#;
+        let mess = lex(msg, |p| p.next_token_inner());
+        assert_eq!(Token::FloatLit(1_000_000_000.0), mess);
+    }
+}
diff --git a/src/lexer/loc.rs b/src/lexer/loc.rs
new file mode 100644
index 0000000..ea3fc1a
--- /dev/null
+++ b/src/lexer/loc.rs
@@ -0,0 +1,28 @@
+use std::fmt;
+
+pub const FIRST_LINE: u32 = 1;
+pub const FIRST_COL: u32 = 1;
+
+/// Location in file
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct Loc {
+    /// 1-based
+    pub line: u32,
+    /// 1-based
+    pub col: u32,
+}
+
+impl fmt::Display for Loc {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}:{}", self.line, self.col)
+    }
+}
+
+impl Loc {
+    pub fn start() -> Loc {
+        Loc {
+            line: FIRST_LINE,
+            col: FIRST_COL,
+        }
+    }
+}
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
new file mode 100644
index 0000000..bde64f5
--- /dev/null
+++ b/src/lexer/mod.rs
@@ -0,0 +1,12 @@
+//! Implementation of lexer for both protobuf parser and for text format parser.
+
+pub mod float;
+pub mod int;
+pub mod json_number_lit;
+pub mod lexer_impl;
+pub mod loc;
+pub mod num_lit;
+pub mod parser_language;
+pub mod str_lit;
+pub mod token;
+pub mod tokenizer;
diff --git a/src/lexer/num_lit.rs b/src/lexer/num_lit.rs
new file mode 100644
index 0000000..cc64cc4
--- /dev/null
+++ b/src/lexer/num_lit.rs
@@ -0,0 +1,5 @@
+#[derive(Copy, Clone)]
+pub enum NumLit {
+    U64(u64),
+    F64(f64),
+}
diff --git a/src/lexer/parser_language.rs b/src/lexer/parser_language.rs
new file mode 100644
index 0000000..e356571
--- /dev/null
+++ b/src/lexer/parser_language.rs
@@ -0,0 +1,10 @@
+/// We use the same lexer/tokenizer for all parsers for simplicity
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum ParserLanguage {
+    // `.proto` files
+    Proto,
+    // Protobuf text format
+    TextFormat,
+    // JSON
+    Json,
+}
diff --git a/src/lexer/str_lit.rs b/src/lexer/str_lit.rs
new file mode 100644
index 0000000..0e51a16
--- /dev/null
+++ b/src/lexer/str_lit.rs
@@ -0,0 +1,77 @@
+use std::fmt;
+use std::string::FromUtf8Error;
+
+use crate::lexer::lexer_impl::Lexer;
+use crate::lexer::parser_language::ParserLanguage;
+
+#[derive(Debug, thiserror::Error)]
+pub enum StrLitDecodeError {
+    #[error(transparent)]
+    FromUtf8Error(#[from] FromUtf8Error),
+    #[error("String literal decode error")]
+    OtherError,
+}
+
+pub type StrLitDecodeResult<T> = Result<T, StrLitDecodeError>;
+
+/// String literal, both `string` and `bytes`.
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub struct StrLit {
+    pub escaped: String,
+}
+
+impl fmt::Display for StrLit {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "\"{}\"", &self.escaped)
+    }
+}
+
+impl StrLit {
+    /// May fail if not valid UTF8
+    pub fn decode_utf8(&self) -> StrLitDecodeResult<String> {
+        let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json);
+        let mut r = Vec::new();
+        while !lexer.eof() {
+            r.push(
+                lexer
+                    .next_byte_value()
+                    .map_err(|_| StrLitDecodeError::OtherError)?,
+            );
+        }
+        Ok(String::from_utf8(r)?)
+    }
+
+    pub fn decode_bytes(&self) -> StrLitDecodeResult<Vec<u8>> {
+        let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json);
+        let mut r = Vec::new();
+        while !lexer.eof() {
+            r.push(
+                lexer
+                    .next_byte_value()
+                    .map_err(|_| StrLitDecodeError::OtherError)?,
+            );
+        }
+        Ok(r)
+    }
+
+    pub fn quoted(&self) -> String {
+        format!("\"{}\"", self.escaped)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::lexer::str_lit::StrLit;
+
+    #[test]
+    fn decode_utf8() {
+        assert_eq!(
+            "\u{1234}".to_owned(),
+            StrLit {
+                escaped: "\\341\\210\\264".to_owned()
+            }
+            .decode_utf8()
+            .unwrap()
+        )
+    }
+}
diff --git a/src/lexer/token.rs b/src/lexer/token.rs
new file mode 100644
index 0000000..b20aba6
--- /dev/null
+++ b/src/lexer/token.rs
@@ -0,0 +1,47 @@
+use crate::lexer::json_number_lit::JsonNumberLit;
+use crate::lexer::lexer_impl::LexerError;
+use crate::lexer::lexer_impl::LexerResult;
+use crate::lexer::loc::Loc;
+use crate::lexer::num_lit::NumLit;
+use crate::lexer::str_lit::StrLit;
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum Token {
+    Ident(String),
+    Symbol(char),
+    // Protobuf tokenizer has separate tokens for int and float.
+    // Tokens do not include sign.
+    IntLit(u64),
+    FloatLit(f64),
+    JsonNumber(JsonNumberLit),
+    // including quotes
+    StrLit(StrLit),
+}
+
+impl Token {
+    /// Back to original
+    pub fn format(&self) -> String {
+        match self {
+            &Token::Ident(ref s) => s.clone(),
+            &Token::Symbol(c) => c.to_string(),
+            &Token::IntLit(ref i) => i.to_string(),
+            &Token::StrLit(ref s) => s.quoted(),
+            &Token::FloatLit(ref f) => f.to_string(),
+            &Token::JsonNumber(ref f) => f.to_string(),
+        }
+    }
+
+    pub fn to_num_lit(&self) -> LexerResult<NumLit> {
+        match self {
+            &Token::IntLit(i) => Ok(NumLit::U64(i)),
+            &Token::FloatLit(f) => Ok(NumLit::F64(f)),
+            _ => Err(LexerError::IncorrectInput),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct TokenWithLocation {
+    pub token: Token,
+    pub loc: Loc,
+}
diff --git a/src/lexer/tokenizer.rs b/src/lexer/tokenizer.rs
new file mode 100644
index 0000000..c5e84a0
--- /dev/null
+++ b/src/lexer/tokenizer.rs
@@ -0,0 +1,330 @@
+use crate::lexer::lexer_impl::Lexer;
+use crate::lexer::lexer_impl::LexerError;
+use crate::lexer::loc::Loc;
+use crate::lexer::parser_language::ParserLanguage;
+use crate::lexer::str_lit::StrLit;
+use crate::lexer::str_lit::StrLitDecodeError;
+use crate::lexer::token::Token;
+use crate::lexer::token::TokenWithLocation;
+
+#[derive(Debug, thiserror::Error)]
+pub enum TokenizerError {
+    #[error(transparent)]
+    LexerError(#[from] LexerError),
+    #[error(transparent)]
+    StrLitDecodeError(#[from] StrLitDecodeError),
+    #[error("Internal tokenizer error")]
+    InternalError,
+    // TODO: too broad
+    #[error("Incorrect input")]
+    IncorrectInput,
+    #[error("Not allowed in this context: {0}")]
+    NotAllowedInThisContext(&'static str),
+    #[error("Unexpected end of input")]
+    UnexpectedEof,
+    #[error("Expecting string literal")]
+    ExpectStrLit,
+    #[error("Expecting int literal")]
+    ExpectIntLit,
+    #[error("Expecting float literal")]
+    ExpectFloatLit,
+    #[error("Expecting identifier")]
+    ExpectIdent,
+    #[error("Expecting identifier `{}`", .0)]
+    ExpectNamedIdent(String),
+    #[error("While parsing {}, expecting char `{}`", .1, .0)]
+    ExpectChar(char, &'static str),
+    #[error("Expecting any char of: {}", .0.iter().map(|c| format!("`{}`", c)).collect::<Vec<_>>().join(", "))]
+    ExpectAnyChar(Vec<char>),
+}
+
+pub type TokenizerResult<R> = Result<R, TokenizerError>;
+
+#[derive(Clone)]
+pub struct Tokenizer<'a> {
+    lexer: Lexer<'a>,
+    next_token: Option<TokenWithLocation>,
+    last_token_loc: Option<Loc>,
+}
+
+impl<'a> Tokenizer<'a> {
+    pub fn new(input: &'a str, comment_style: ParserLanguage) -> Tokenizer<'a> {
+        Tokenizer {
+            lexer: Lexer::new(input, comment_style),
+            next_token: None,
+            last_token_loc: None,
+        }
+    }
+
+    pub fn loc(&self) -> Loc {
+        // After lookahead return the location of the next token
+        self.next_token
+            .as_ref()
+            .map(|t| t.loc.clone())
+            // After token consumed return the location of that token
+            .or(self.last_token_loc.clone())
+            // Otherwise return the position of lexer
+            .unwrap_or(self.lexer.loc)
+    }
+
+    pub fn lookahead_loc(&mut self) -> Loc {
+        drop(self.lookahead());
+        // TODO: does not handle EOF properly
+        self.loc()
+    }
+
+    fn lookahead(&mut self) -> TokenizerResult<Option<&Token>> {
+        Ok(match self.next_token {
+            Some(ref token) => Some(&token.token),
+            None => {
+                self.next_token = self.lexer.next_token()?;
+                self.last_token_loc = self.next_token.as_ref().map(|t| t.loc.clone());
+                match self.next_token {
+                    Some(ref token) => Some(&token.token),
+                    None => None,
+                }
+            }
+        })
+    }
+
+    pub fn lookahead_some(&mut self) -> TokenizerResult<&Token> {
+        match self.lookahead()? {
+            Some(token) => Ok(token),
+            None => Err(TokenizerError::UnexpectedEof),
+        }
+    }
+
+    fn next(&mut self) -> TokenizerResult<Option<Token>> {
+        self.lookahead()?;
+        Ok(self
+            .next_token
+            .take()
+            .map(|TokenWithLocation { token, .. }| token))
+    }
+
+    pub fn next_some(&mut self) -> TokenizerResult<Token> {
+        match self.next()? {
+            Some(token) => Ok(token),
+            None => Err(TokenizerError::UnexpectedEof),
+        }
+    }
+
+    /// Can be called only after lookahead, otherwise it's error
+    pub fn advance(&mut self) -> TokenizerResult<Token> {
+        self.next_token
+            .take()
+            .map(|TokenWithLocation { token, .. }| token)
+            .ok_or(TokenizerError::InternalError)
+    }
+
+    /// No more tokens
+    pub fn syntax_eof(&mut self) -> TokenizerResult<bool> {
+        Ok(self.lookahead()?.is_none())
+    }
+
+    pub fn next_token_if_map<P, R>(&mut self, p: P) -> TokenizerResult<Option<R>>
+    where
+        P: FnOnce(&Token) -> Option<R>,
+    {
+        self.lookahead()?;
+        let v = match self.next_token {
+            Some(ref token) => match p(&token.token) {
+                Some(v) => v,
+                None => return Ok(None),
+            },
+            _ => return Ok(None),
+        };
+        self.next_token = None;
+        Ok(Some(v))
+    }
+
+    pub fn next_token_check_map<P, R, E>(&mut self, p: P) -> Result<R, E>
+    where
+        P: FnOnce(&Token) -> Result<R, E>,
+        E: From<TokenizerError>,
+    {
+        self.lookahead()?;
+        let r = match self.next_token {
+            Some(ref token) => p(&token.token)?,
+            None => return Err(TokenizerError::UnexpectedEof.into()),
+        };
+        self.next_token = None;
+        Ok(r)
+    }
+
+    fn next_token_if<P>(&mut self, p: P) -> TokenizerResult<Option<Token>>
+    where
+        P: FnOnce(&Token) -> bool,
+    {
+        self.next_token_if_map(|token| if p(token) { Some(token.clone()) } else { None })
+    }
+
+    pub fn next_ident_if_in(&mut self, idents: &[&str]) -> TokenizerResult<Option<String>> {
+        let v = match self.lookahead()? {
+            Some(&Token::Ident(ref next)) => {
+                if idents.into_iter().find(|&i| i == next).is_some() {
+                    next.clone()
+                } else {
+                    return Ok(None);
+                }
+            }
+            _ => return Ok(None),
+        };
+        self.advance()?;
+        Ok(Some(v))
+    }
+
+    pub fn next_ident_if_eq(&mut self, word: &str) -> TokenizerResult<bool> {
+        Ok(self.next_ident_if_in(&[word])? != None)
+    }
+
+    pub fn next_ident_expect_eq(&mut self, word: &str) -> TokenizerResult<()> {
+        if self.next_ident_if_eq(word)? {
+            Ok(())
+        } else {
+            Err(TokenizerError::ExpectNamedIdent(word.to_owned()))
+        }
+    }
+
+    pub fn next_ident_if_eq_error(&mut self, word: &'static str) -> TokenizerResult<()> {
+        if self.clone().next_ident_if_eq(word)? {
+            // TODO: which context?
+            return Err(TokenizerError::NotAllowedInThisContext(word));
+        }
+        Ok(())
+    }
+
+    pub fn next_symbol_if_eq(&mut self, symbol: char) -> TokenizerResult<bool> {
+        Ok(self.next_token_if(|token| match token {
+            &Token::Symbol(c) if c == symbol => true,
+            _ => false,
+        })? != None)
+    }
+
+    pub fn next_symbol_expect_eq(
+        &mut self,
+        symbol: char,
+        desc: &'static str,
+    ) -> TokenizerResult<()> {
+        if self.lookahead_is_symbol(symbol)? {
+            self.advance()?;
+            Ok(())
+        } else {
+            Err(TokenizerError::ExpectChar(symbol, desc))
+        }
+    }
+
+    pub fn next_symbol_expect_eq_oneof(&mut self, symbols: &[char]) -> TokenizerResult<char> {
+        for symbol in symbols {
+            if let Ok(()) = self.next_symbol_expect_eq(*symbol, "ignored") {
+                return Ok(*symbol);
+            }
+        }
+        Err(TokenizerError::ExpectAnyChar(symbols.to_owned()))
+    }
+
+    pub fn lookahead_is_str_lit(&mut self) -> TokenizerResult<bool> {
+        Ok(match self.lookahead()? {
+            Some(&Token::StrLit(..)) => true,
+            _ => false,
+        })
+    }
+
+    pub fn lookahead_is_int_lit(&mut self) -> TokenizerResult<bool> {
+        Ok(match self.lookahead()? {
+            Some(&Token::IntLit(..)) => true,
+            _ => false,
+        })
+    }
+
+    pub fn lookahead_is_json_number(&mut self) -> TokenizerResult<bool> {
+        Ok(match self.lookahead()? {
+            Some(&Token::JsonNumber(..)) => true,
+            _ => false,
+        })
+    }
+
+    pub fn lookahead_if_symbol(&mut self) -> TokenizerResult<Option<char>> {
+        Ok(match self.lookahead()? {
+            Some(&Token::Symbol(c)) => Some(c),
+            _ => None,
+        })
+    }
+
+    pub fn lookahead_is_symbol(&mut self, symbol: char) -> TokenizerResult<bool> {
+        Ok(self.lookahead_if_symbol()? == Some(symbol))
+    }
+
+    pub fn lookahead_is_ident(&mut self, ident: &str) -> TokenizerResult<bool> {
+        Ok(match self.lookahead()? {
+            Some(Token::Ident(i)) => i == ident,
+            _ => false,
+        })
+    }
+
+    pub fn next_ident(&mut self) -> TokenizerResult<String> {
+        self.next_token_check_map(|token| match token {
+            &Token::Ident(ref ident) => Ok(ident.clone()),
+            _ => Err(TokenizerError::ExpectIdent),
+        })
+    }
+
+    pub fn next_str_lit(&mut self) -> TokenizerResult<StrLit> {
+        self.next_token_check_map(|token| match token {
+            &Token::StrLit(ref str_lit) => Ok(str_lit.clone()),
+            _ => Err(TokenizerError::ExpectStrLit),
+        })
+    }
+
+    pub fn next_int_lit(&mut self) -> TokenizerResult<u64> {
+        self.next_token_check_map(|token| match token {
+            &Token::IntLit(v) => Ok(v),
+            _ => Err(TokenizerError::ExpectIntLit),
+        })
+    }
+
+    pub fn next_float_lit(&mut self) -> TokenizerResult<f64> {
+        self.next_token_check_map(|token| match token {
+            &Token::FloatLit(v) => Ok(v),
+            _ => Err(TokenizerError::ExpectFloatLit),
+        })
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use super::*;
+
+    fn tokenize<P, R>(input: &str, what: P) -> R
+    where
+        P: FnOnce(&mut Tokenizer) -> TokenizerResult<R>,
+    {
+        let mut tokenizer = Tokenizer::new(input, ParserLanguage::Proto);
+        let r = what(&mut tokenizer).expect(&format!("parse failed at {}", tokenizer.loc()));
+        let eof = tokenizer
+            .syntax_eof()
+            .expect(&format!("check eof failed at {}", tokenizer.loc()));
+        assert!(eof, "{}", tokenizer.loc());
+        r
+    }
+
+    #[test]
+    fn test_ident() {
+        let msg = r#"  aabb_c  "#;
+        let mess = tokenize(msg, |p| p.next_ident().map(|s| s.to_owned()));
+        assert_eq!("aabb_c", mess);
+    }
+
+    #[test]
+    fn test_str_lit() {
+        let msg = r#"  "a\nb"  "#;
+        let mess = tokenize(msg, |p| p.next_str_lit());
+        assert_eq!(
+            StrLit {
+                escaped: r#"a\nb"#.to_owned()
+            },
+            mess
+        );
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..63c2a8d
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,9 @@
+//! # Supporting code for protobuf crates
+//!
+//! Code in this crate is used in protobuf crates like `protobuf` or `protobuf-parse`.
+//! None of code in this crate has public API.
+
+pub mod json_name;
+pub mod lexer;
+pub mod text_format;
+pub mod toposort;
diff --git a/src/text_format.rs b/src/text_format.rs
new file mode 100644
index 0000000..f49f4c2
--- /dev/null
+++ b/src/text_format.rs
@@ -0,0 +1,75 @@
+pub fn escape_bytes_to(bytes: &[u8], buf: &mut String) {
+    for &c in bytes {
+        match c {
+            b'\n' => buf.push_str(r"\n"),
+            b'\r' => buf.push_str(r"\r"),
+            b'\t' => buf.push_str(r"\t"),
+            b'\'' => buf.push_str("\\\'"),
+            b'"' => buf.push_str("\\\""),
+            b'\\' => buf.push_str(r"\\"),
+            b'\x20'..=b'\x7e' => buf.push(c as char),
+            _ => {
+                buf.push('\\');
+                buf.push((b'0' + (c >> 6)) as char);
+                buf.push((b'0' + ((c >> 3) & 7)) as char);
+                buf.push((b'0' + (c & 7)) as char);
+            }
+        }
+    }
+}
+
+pub fn quote_bytes_to(bytes: &[u8], buf: &mut String) {
+    buf.push('"');
+    escape_bytes_to(bytes, buf);
+    buf.push('"');
+}
+
+#[cfg(test)]
+mod test {
+    use crate::lexer::str_lit::StrLit;
+    use crate::text_format::escape_bytes_to;
+
+    fn escape(data: &[u8]) -> String {
+        let mut s = String::with_capacity(data.len() * 4);
+        escape_bytes_to(data, &mut s);
+        s
+    }
+
+    fn unescape_string(escaped: &str) -> Vec<u8> {
+        StrLit {
+            escaped: escaped.to_owned(),
+        }
+        .decode_bytes()
+        .expect("decode_bytes")
+    }
+
+    fn test_escape_unescape(text: &str, escaped: &str) {
+        assert_eq!(text.as_bytes(), &unescape_string(escaped)[..]);
+        assert_eq!(escaped, &escape(text.as_bytes())[..]);
+    }
+
+    #[test]
+    fn test_print_to_bytes() {
+        assert_eq!("ab", escape(b"ab"));
+        assert_eq!("a\\\\023", escape(b"a\\023"));
+        assert_eq!("a\\r\\n\\t \\'\\\"\\\\", escape(b"a\r\n\t '\"\\"));
+        assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes()));
+    }
+
+    #[test]
+    fn test_unescape_string() {
+        test_escape_unescape("", "");
+        test_escape_unescape("aa", "aa");
+        test_escape_unescape("\n", "\\n");
+        test_escape_unescape("\r", "\\r");
+        test_escape_unescape("\t", "\\t");
+        test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275");
+        // hex
+        assert_eq!(b"aaa\x01bbb", &unescape_string("aaa\\x01bbb")[..]);
+        assert_eq!(b"aaa\xcdbbb", &unescape_string("aaa\\xCDbbb")[..]);
+        assert_eq!(b"aaa\xcdbbb", &unescape_string("aaa\\xCDbbb")[..]);
+        // quotes
+        assert_eq!(b"aaa\"bbb", &unescape_string("aaa\\\"bbb")[..]);
+        assert_eq!(b"aaa\'bbb", &unescape_string("aaa\\\'bbb")[..]);
+    }
+}
diff --git a/src/toposort.rs b/src/toposort.rs
new file mode 100644
index 0000000..5e44590
--- /dev/null
+++ b/src/toposort.rs
@@ -0,0 +1,119 @@
+use std::collections::HashSet;
+use std::hash::Hash;
+
+#[derive(Debug, thiserror::Error)]
+#[error("Cycle detected")]
+pub struct TopoSortCycle;
+
+pub fn toposort<K, I>(
+    input: impl IntoIterator<Item = K>,
+    deps: impl Fn(&K) -> I,
+) -> Result<Vec<K>, TopoSortCycle>
+where
+    K: Eq + Hash + Clone,
+    I: Iterator<Item = K>,
+{
+    struct Ts<K, D, I>
+    where
+        K: Eq + Hash + Clone,
+        I: Iterator<Item = K>,
+        D: Fn(&K) -> I,
+    {
+        result_set: HashSet<K>,
+        result: Vec<K>,
+        deps: D,
+        stack: HashSet<K>,
+    }
+
+    impl<K, D, I> Ts<K, D, I>
+    where
+        K: Eq + Hash + Clone,
+        I: Iterator<Item = K>,
+        D: Fn(&K) -> I,
+    {
+        fn visit(&mut self, i: &K) -> Result<(), TopoSortCycle> {
+            if self.result_set.contains(i) {
+                return Ok(());
+            }
+
+            if !self.stack.insert(i.clone()) {
+                return Err(TopoSortCycle);
+            }
+            for dep in (self.deps)(i) {
+                self.visit(&dep)?;
+            }
+
+            let removed = self.stack.remove(i);
+            assert!(removed);
+
+            self.result.push(i.clone());
+            self.result_set.insert(i.clone());
+
+            Ok(())
+        }
+    }
+
+    let mut ts = Ts {
+        result: Vec::new(),
+        result_set: HashSet::new(),
+        deps,
+        stack: HashSet::new(),
+    };
+
+    for i in input {
+        ts.visit(&i)?;
+    }
+
+    Ok(ts.result)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use crate::toposort::toposort;
+    use crate::toposort::TopoSortCycle;
+
+    fn test_toposort(input: &str) -> Result<Vec<&str>, TopoSortCycle> {
+        let mut keys: Vec<&str> = Vec::new();
+        let mut edges: HashMap<&str, Vec<&str>> = HashMap::new();
+        for part in input.split(" ") {
+            match part.split_once("->") {
+                Some((k, vs)) => {
+                    keys.push(k);
+                    edges.insert(k, vs.split(",").collect());
+                }
+                None => keys.push(part),
+            };
+        }
+
+        toposort(keys, |k| {
+            edges
+                .get(k)
+                .map(|v| v.as_slice())
+                .unwrap_or_default()
+                .into_iter()
+                .copied()
+        })
+    }
+
+    fn test_toposort_check(input: &str, expected: &str) {
+        let sorted = test_toposort(input).unwrap();
+        let expected = expected.split(" ").collect::<Vec<_>>();
+        assert_eq!(expected, sorted);
+    }
+
+    #[test]
+    fn test() {
+        test_toposort_check("1 2 3", "1 2 3");
+        test_toposort_check("1->2 2->3 3", "3 2 1");
+        test_toposort_check("1 2->1 3->2", "1 2 3");
+        test_toposort_check("1->2,3 2->3 3", "3 2 1");
+    }
+
+    #[test]
+    fn cycle() {
+        assert!(test_toposort("1->1").is_err());
+        assert!(test_toposort("1->2 2->1").is_err());
+    }
+}
-- 
cgit v1.2.3