aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Mayle <fmayle@google.com>2022-10-27 16:29:18 -0700
committerFrederick Mayle <fmayle@google.com>2022-11-04 12:48:37 -0700
commit7bad3cec8fb7e1df84a77abfef5d699f82d6d22e (patch)
tree92570a0dccac867da09bc052b3a64070698f56dc
parent5f8013ba000d9ad23aac5f3121bde4f37a561fcb (diff)
downloadlitrs-7bad3cec8fb7e1df84a77abfef5d699f82d6d22e.tar.gz
Initial import of litrs-0.2.3
Bug: 255384162 Test: n/a Change-Id: I66ab38c4a632133836eb95d2786e6f305f2b46dd
-rw-r--r--.cargo_vcs_info.json5
-rw-r--r--.gitignore2
-rw-r--r--CHANGELOG.md59
-rw-r--r--Cargo.toml31
-rw-r--r--Cargo.toml.orig30
l---------LICENSE1
-rw-r--r--LICENSE-APACHE176
-rw-r--r--LICENSE-MIT25
-rw-r--r--METADATA20
-rw-r--r--MODULE_LICENSE_APACHE20
-rw-r--r--OWNERS2
-rw-r--r--README.md88
-rw-r--r--src/bool/mod.rs51
-rw-r--r--src/bool/tests.rs48
-rw-r--r--src/byte/mod.rs91
-rw-r--r--src/byte/tests.rs173
-rw-r--r--src/bytestr/mod.rs113
-rw-r--r--src/bytestr/tests.rs210
-rw-r--r--src/char/mod.rs88
-rw-r--r--src/char/tests.rs213
-rw-r--r--src/err.rs363
-rw-r--r--src/escape.rs255
-rw-r--r--src/float/mod.rs202
-rw-r--r--src/float/tests.rs205
-rw-r--r--src/impls.rs339
-rw-r--r--src/integer/mod.rs285
-rw-r--r--src/integer/tests.rs336
-rw-r--r--src/lib.rs288
-rw-r--r--src/parse.rs81
-rw-r--r--src/string/mod.rs110
-rw-r--r--src/string/tests.rs263
-rw-r--r--src/test_util.rs81
-rw-r--r--src/tests.rs351
33 files changed, 4585 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..b790a60
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+ "git": {
+ "sha1": "219f0ca73d15cc518e9d56952b4e296dc8c17636"
+ }
+}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96ef6c0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+Cargo.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..9e6628a
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,59 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+
+## [Unreleased]
+
+## [0.2.3] - 2021-06-09
+### Changed
+- Minor internal code change to bring MSRV from 1.52 to 1.42
+
+## [0.2.2] - 2021-06-09
+### Changed
+- Fixed (byte) string literal parsing by:
+ - Correctly handling "string continue" sequences
+ - Correctly converting `\n\r` into `\n`
+
+## [0.2.1] - 2021-06-04
+### Changed
+- Fixed the `expected` value of the error returned from `TryFrom<TokenTree>` impls in some cases
+
+## [0.2.0] - 2021-05-28
+### Changed
+- **Breaking**: rename `Error` to `ParseError`. That describes its purpose more
+ closely and is particular useful now that other error types exist in the library.
+
+### Removed
+- **Breaking**: remove `proc-macro` feature and instead offer the corresponding
+ `impl`s unconditionally. Since the feature didn't enable/disable a
+ dependency (`proc-macro` is a compiler provided crate) and since apparently
+ it works fine in `no_std` environments, I dropped this feature. I don't
+ currently see a reason why the corresponding impls should be conditional.
+
+### Added
+- `TryFrom<TokenTree> for litrs::Literal` impls
+- `From<*Lit> for litrs::Literal` impls
+- `TryFrom<proc_macro[2]::Literal> for *Lit`
+- `TryFrom<TokenTree> for *Lit`
+- `InvalidToken` error type for all new `TryFrom` impls
+
+
+## [0.1.1] - 2021-05-25
+### Added
+- `From` impls to create a `Literal` from references to proc-macro literal types:
+ - `From<&proc_macro::Literal>`
+ - `From<&proc_macro2::Literal>`
+- Better examples in README and repository
+
+## 0.1.0 - 2021-05-24
+### Added
+- Everything
+
+
+[Unreleased]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.3...HEAD
+[0.2.3]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.2...v0.2.3
+[0.2.2]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.1...v0.2.2
+[0.2.1]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.0...v0.2.1
+[0.2.0]: https://github.com/LukasKalbertodt/litrs/compare/v0.1.1...v0.2.0
+[0.1.1]: https://github.com/LukasKalbertodt/litrs/compare/v0.1.0...v0.1.1
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..e0e1115
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,31 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+edition = "2018"
+name = "litrs"
+version = "0.2.3"
+authors = ["Lukas Kalbertodt <lukas.kalbertodt@gmail.com>"]
+exclude = [".github"]
+description = "Parse and inspect Rust literals (i.e. tokens in the Rust programming language\nrepresenting fixed values). Particularly useful for proc macros, but can also\nbe used outside of a proc-macro context.\n"
+documentation = "https://docs.rs/litrs/"
+readme = "README.md"
+keywords = ["literal", "parsing", "proc-macro", "type", "procedural"]
+categories = ["development-tools::procedural-macro-helpers", "parser-implementations", "development-tools::build-utils"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/LukasKalbertodt/litrs/"
+[dependencies.proc-macro2]
+version = "1"
+optional = true
+
+[features]
+default = ["proc-macro2"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..9adec4c
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,30 @@
+[package]
+name = "litrs"
+version = "0.2.3"
+authors = ["Lukas Kalbertodt <lukas.kalbertodt@gmail.com>"]
+edition = "2018"
+
+description = """
+Parse and inspect Rust literals (i.e. tokens in the Rust programming language
+representing fixed values). Particularly useful for proc macros, but can also
+be used outside of a proc-macro context.
+"""
+documentation = "https://docs.rs/litrs/"
+repository = "https://github.com/LukasKalbertodt/litrs/"
+readme = "README.md"
+license = "MIT/Apache-2.0"
+
+keywords = ["literal", "parsing", "proc-macro", "type", "procedural"]
+categories = [
+ "development-tools::procedural-macro-helpers",
+ "parser-implementations",
+ "development-tools::build-utils",
+]
+exclude = [".github"]
+
+
+[features]
+default = ["proc-macro2"]
+
+[dependencies]
+proc-macro2 = { version = "1", optional = true }
diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..6b579aa
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1 @@
+LICENSE-APACHE \ No newline at end of file
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..1b5ec8b
--- /dev/null
+++ b/LICENSE-APACHE
@@ -0,0 +1,176 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..4fa8658
--- /dev/null
+++ b/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2020 Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..74905a4
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,20 @@
+name: "litrs"
+description: "Parse and inspect Rust literals (i.e. tokens in the Rust programming language representing fixed values). Particularly useful for proc macros, but can also be used outside of a proc-macro context."
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://crates.io/crates/litrs"
+ }
+ url {
+ type: ARCHIVE
+ value: "https://static.crates.io/crates/litrs/litrs-0.2.3.crate"
+ }
+ version: "0.2.3"
+ # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same.
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2022
+ month: 10
+ day: 27
+ }
+}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..4e1e118
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,2 @@
+include platform/prebuilts/rust:master:/OWNERS
+fmayle@google.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..04c7b47
--- /dev/null
+++ b/README.md
@@ -0,0 +1,88 @@
+# `litrs`: parsing and inspecting Rust literals
+
+[<img alt="CI status of master" src="https://img.shields.io/github/workflow/status/LukasKalbertodt/litrs/CI/master?label=CI&logo=github&logoColor=white&style=for-the-badge" height="23">](https://github.com/LukasKalbertodt/litrs/actions?query=workflow%3ACI+branch%3Amaster)
+[<img alt="Crates.io Version" src="https://img.shields.io/crates/v/litrs?logo=rust&style=for-the-badge" height="23">](https://crates.io/crates/litrs)
+[<img alt="docs.rs" src="https://img.shields.io/crates/v/litrs?color=blue&label=docs&style=for-the-badge" height="23">](https://docs.rs/litrs)
+
+`litrs` offers functionality to parse Rust literals, i.e. tokens in the Rust programming language that represent fixed values.
+This is particularly useful for proc macros, but can also be used outside of a proc-macro context.
+
+**Why this library?**
+Unfortunately, the `proc_macro` API shipped with the compiler offers no easy way to inspect literals.
+There are mainly two libraries for this purpose:
+[`syn`](https://github.com/dtolnay/syn) and [`literalext`](https://github.com/mystor/literalext).
+The latter is deprecated.
+And `syn` is oftentimes overkill for the task at hand, especially when developing function like proc-macros (e.g. `foo!(..)`).
+This crate is a lightweight alternative.
+Also, when it comes to literals, `litrs` offers a bit more flexibility and a few more features compared to `syn`.
+
+While this library is fairly young, it is extensively tested and I think the number of parsing bugs should already be very low.
+I'm interested in community feedback!
+If you consider using this, please speak your mind [in this issue](https://github.com/LukasKalbertodt/litrs/issues/1).
+
+## Example
+
+### In proc macro
+
+```rust
+use std::convert::TryFrom;
+use proc_macro::TokenStream;
+use litrs::Literal;
+
+#[proc_macro]
+pub fn foo(input: TokenStream) -> TokenStream {
+ // Please do proper error handling in your real code!
+ let first_token = input.into_iter().next().expect("no input");
+
+ // `try_from` will return an error if the token is not a literal.
+ match Literal::try_from(first_token) {
+ // Convenient methods to produce decent errors via `compile_error!`.
+ Err(e) => return e.to_compile_error(),
+
+ // You can now inspect your literal!
+ Ok(Literal::Integer(i)) => {
+ println!("Got an integer specified in base {:?}", i.base());
+
+ let value = i.value::<u64>().expect("integer literal too large");
+ println!("Is your integer even? {}", value % 2 == 0);
+ }
+ Ok(other) => {
+ println!("Got a non-integer literal");
+ }
+ }
+
+ TokenStream::new() // dummy output
+}
+```
+
+If you are expecting a specific kind of literal, you can also use this, which will return an error if the token is not a float literal.
+
+```rust
+FloatLit::try_from(first_token)
+```
+
+### Parsing from a `&str`
+
+Outside of a proc macro context you might want to parse a string directly.
+
+```rust
+use litrs::{FloatLit, Literal};
+
+let lit = Literal::parse("'🦀'").expect("failed to parse literal");
+let float_lit = FloatLit::parse("2.7e3").expect("failed to parse as float literal");
+```
+
+See [**the documentation**](https://docs.rs/litrs) or the `examples/` directory for more examples and information.
+
+
+<br />
+
+---
+
+## License
+
+Licensed under either of <a href="LICENSE-APACHE">Apache License, Version
+2.0</a> or <a href="LICENSE-MIT">MIT license</a> at your option.
+Unless you explicitly state otherwise, any contribution intentionally submitted
+for inclusion in this project by you, as defined in the Apache-2.0 license,
+shall be dual licensed as above, without any additional terms or conditions.
diff --git a/src/bool/mod.rs b/src/bool/mod.rs
new file mode 100644
index 0000000..406174c
--- /dev/null
+++ b/src/bool/mod.rs
@@ -0,0 +1,51 @@
+use std::fmt;
+
+use crate::{ParseError, err::{perr, ParseErrorKind::*}};
+
+
+/// A bool literal: `true` or `false`. Also see [the reference][ref].
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#boolean-literals
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BoolLit {
+ False,
+ True,
+}
+
+impl BoolLit {
+ /// Parses the input as a bool literal. Returns an error if the input is
+ /// invalid or represents a different kind of literal.
+ pub fn parse(s: &str) -> Result<Self, ParseError> {
+ match s {
+ "false" => Ok(Self::False),
+ "true" => Ok(Self::True),
+ _ => Err(perr(None, InvalidLiteral)),
+ }
+ }
+
+ /// Returns the actual Boolean value of this literal.
+ pub fn value(self) -> bool {
+ match self {
+ Self::False => false,
+ Self::True => true,
+ }
+ }
+
+ /// Returns the literal as string.
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ Self::False => "false",
+ Self::True => "true",
+ }
+ }
+}
+
+impl fmt::Display for BoolLit {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.pad(self.as_str())
+ }
+}
+
+
+#[cfg(test)]
+mod tests;
diff --git a/src/bool/tests.rs b/src/bool/tests.rs
new file mode 100644
index 0000000..4b82924
--- /dev/null
+++ b/src/bool/tests.rs
@@ -0,0 +1,48 @@
+use crate::{
+ Literal, BoolLit,
+ test_util::assert_parse_ok_eq,
+};
+
+macro_rules! assert_bool_parse {
+ ($input:literal, $expected:expr) => {
+ assert_parse_ok_eq(
+ $input, Literal::parse($input), Literal::Bool($expected), "Literal::parse");
+ assert_parse_ok_eq($input, BoolLit::parse($input), $expected, "BoolLit::parse");
+ };
+}
+
+
+
+#[test]
+fn parse_ok() {
+ assert_bool_parse!("false", BoolLit::False);
+ assert_bool_parse!("true", BoolLit::True);
+}
+
+#[test]
+fn parse_err() {
+ assert!(Literal::parse("fa").is_err());
+ assert!(Literal::parse("fal").is_err());
+ assert!(Literal::parse("fals").is_err());
+ assert!(Literal::parse(" false").is_err());
+ assert!(Literal::parse("false ").is_err());
+ assert!(Literal::parse("False").is_err());
+
+ assert!(Literal::parse("tr").is_err());
+ assert!(Literal::parse("tru").is_err());
+ assert!(Literal::parse(" true").is_err());
+ assert!(Literal::parse("true ").is_err());
+ assert!(Literal::parse("True").is_err());
+}
+
+#[test]
+fn value() {
+ assert!(!BoolLit::False.value());
+ assert!(BoolLit::True.value());
+}
+
+#[test]
+fn as_str() {
+ assert_eq!(BoolLit::False.as_str(), "false");
+ assert_eq!(BoolLit::True.as_str(), "true");
+}
diff --git a/src/byte/mod.rs b/src/byte/mod.rs
new file mode 100644
index 0000000..5f60e42
--- /dev/null
+++ b/src/byte/mod.rs
@@ -0,0 +1,91 @@
+use core::fmt;
+
+use crate::{
+ Buffer, ParseError,
+ err::{perr, ParseErrorKind::*},
+ escape::unescape,
+};
+
+
+/// A (single) byte literal, e.g. `b'k'` or `b'!'`.
+///
+/// See [the reference][ref] for more information.
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-literals
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ByteLit<B: Buffer> {
+ raw: B,
+ value: u8,
+}
+
+impl<B: Buffer> ByteLit<B> {
+ /// Parses the input as a byte literal. Returns an error if the input is
+ /// invalid or represents a different kind of literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ if input.is_empty() {
+ return Err(perr(None, Empty));
+ }
+ if !input.starts_with("b'") {
+ return Err(perr(None, InvalidByteLiteralStart));
+ }
+
+ Self::parse_impl(input)
+ }
+
+ /// Returns the byte value that this literal represents.
+ pub fn value(&self) -> u8 {
+ self.value
+ }
+
+ /// Precondition: must start with `b'`.
+ pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
+ if input.len() == 2 {
+ return Err(perr(None, UnterminatedByteLiteral));
+ }
+ if *input.as_bytes().last().unwrap() != b'\'' {
+ return Err(perr(None, UnterminatedByteLiteral));
+ }
+
+ let inner = &input[2..input.len() - 1];
+ let first = inner.as_bytes().get(0).ok_or(perr(None, EmptyByteLiteral))?;
+ let (c, len) = match first {
+ b'\'' => return Err(perr(2, UnescapedSingleQuote)),
+ b'\n' | b'\t' | b'\r'
+ => return Err(perr(2, UnescapedSpecialWhitespace)),
+
+ b'\\' => unescape::<u8>(inner, 2)?,
+ other if other.is_ascii() => (*other, 1),
+ _ => return Err(perr(2, NonAsciiInByteLiteral)),
+ };
+ let rest = &inner[len..];
+
+ if !rest.is_empty() {
+ return Err(perr(len + 2..input.len() - 1, OverlongByteLiteral));
+ }
+
+ Ok(Self {
+ raw: input,
+ value: c,
+ })
+ }
+}
+
+impl ByteLit<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn to_owned(&self) -> ByteLit<String> {
+ ByteLit {
+ raw: self.raw.to_owned(),
+ value: self.value,
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for ByteLit<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.pad(&self.raw)
+ }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/byte/tests.rs b/src/byte/tests.rs
new file mode 100644
index 0000000..5f663ce
--- /dev/null
+++ b/src/byte/tests.rs
@@ -0,0 +1,173 @@
+use crate::{ByteLit, Literal, test_util::assert_parse_ok_eq};
+
+// ===== Utility functions =======================================================================
+
+macro_rules! check {
+ ($lit:literal) => {
+ let input = stringify!($lit);
+ let expected = ByteLit {
+ raw: input,
+ value: $lit,
+ };
+
+ assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse");
+ assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse");
+ assert_eq!(ByteLit::parse(input).unwrap().value(), $lit);
+ };
+}
+
+
+// ===== Actual tests ============================================================================
+
+#[test]
+fn alphanumeric() {
+ check!(b'a');
+ check!(b'b');
+ check!(b'y');
+ check!(b'z');
+ check!(b'A');
+ check!(b'B');
+ check!(b'Y');
+ check!(b'Z');
+
+ check!(b'0');
+ check!(b'1');
+ check!(b'8');
+ check!(b'9');
+}
+
+#[test]
+fn special_chars() {
+ check!(b' ');
+ check!(b'!');
+ check!(b'"');
+ check!(b'#');
+ check!(b'$');
+ check!(b'%');
+ check!(b'&');
+ check!(b'(');
+ check!(b')');
+ check!(b'*');
+ check!(b'+');
+ check!(b',');
+ check!(b'-');
+ check!(b'.');
+ check!(b'/');
+ check!(b':');
+ check!(b';');
+ check!(b'<');
+ check!(b'=');
+ check!(b'>');
+ check!(b'?');
+ check!(b'@');
+ check!(b'[');
+ check!(b']');
+ check!(b'^');
+ check!(b'_');
+ check!(b'`');
+ check!(b'{');
+ check!(b'|');
+ check!(b'}');
+ check!(b'~');
+}
+
+#[test]
+fn quote_escapes() {
+ check!(b'\'');
+ check!(b'\"');
+}
+
+#[test]
+fn ascii_escapes() {
+ check!(b'\n');
+ check!(b'\r');
+ check!(b'\t');
+ check!(b'\\');
+ check!(b'\0');
+
+ check!(b'\x00');
+ check!(b'\x01');
+ check!(b'\x0c');
+ check!(b'\x0D');
+ check!(b'\x13');
+ check!(b'\x30');
+ check!(b'\x30');
+ check!(b'\x4B');
+ check!(b'\x6b');
+ check!(b'\x7F');
+ check!(b'\x7f');
+}
+
+#[test]
+fn byte_escapes() {
+ check!(b'\x80');
+ check!(b'\x8a');
+ check!(b'\x8C');
+ check!(b'\x99');
+ check!(b'\xa0');
+ check!(b'\xAd');
+ check!(b'\xfe');
+ check!(b'\xFe');
+ check!(b'\xfF');
+ check!(b'\xFF');
+}
+
+#[test]
+fn invald_escapes() {
+ assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4);
+ assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4);
+ assert_err!(ByteLit, r"b'\", UnterminatedByteLiteral, None);
+ assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..4);
+ assert_err!(ByteLit, r"b'\x1'", UnterminatedEscape, 2..5);
+ assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6);
+ assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6);
+}
+
+#[test]
+fn unicode_escape_not_allowed() {
+ assert_err!(ByteLit, r"b'\u{0}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{00}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{b}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{B}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{7e}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{E4}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{e4}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{fc}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{Fc}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{fC}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{FC}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{b10}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{B10}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{0b10}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{2764}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{1f602}'", UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteLit, r"b'\u{1F602}'", UnicodeEscapeInByteLiteral, 2..4);
+}
+
+#[test]
+fn parse_err() {
+ assert_err!(ByteLit, r"b''", EmptyByteLiteral, None);
+ assert_err!(ByteLit, r"b' ''", OverlongByteLiteral, 3..4);
+
+ assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None);
+ assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None);
+ assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None);
+ assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None);
+
+ assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, 3..4);
+ assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, 3..5);
+ assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, 4..5);
+
+ assert_err!(ByteLit, r"", Empty, None);
+
+ assert_err!(ByteLit, r"b'''", UnescapedSingleQuote, 2);
+ assert_err!(ByteLit, r"b''''", UnescapedSingleQuote, 2);
+
+ assert_err!(ByteLit, "b'\n'", UnescapedSpecialWhitespace, 2);
+ assert_err!(ByteLit, "b'\t'", UnescapedSpecialWhitespace, 2);
+ assert_err!(ByteLit, "b'\r'", UnescapedSpecialWhitespace, 2);
+
+ assert_err!(ByteLit, "b'న'", NonAsciiInByteLiteral, 2);
+ assert_err!(ByteLit, "b'犬'", NonAsciiInByteLiteral, 2);
+ assert_err!(ByteLit, "b'🦊'", NonAsciiInByteLiteral, 2);
+}
diff --git a/src/bytestr/mod.rs b/src/bytestr/mod.rs
new file mode 100644
index 0000000..6cfb61d
--- /dev/null
+++ b/src/bytestr/mod.rs
@@ -0,0 +1,113 @@
+use std::{fmt, ops::Range};
+
+use crate::{
+ Buffer, ParseError,
+ err::{perr, ParseErrorKind::*},
+ escape::{scan_raw_string, unescape_string},
+};
+
+
+/// A byte string or raw byte string literal, e.g. `b"hello"` or `br#"abc"def"#`.
+///
+/// See [the reference][ref] for more information.
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-string-literals
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ByteStringLit<B: Buffer> {
+ /// The raw input.
+ raw: B,
+
+ /// The string value (with all escaped unescaped), or `None` if there were
+ /// no escapes. In the latter case, `input` is the string value.
+ value: Option<Vec<u8>>,
+
+ /// The number of hash signs in case of a raw string literal, or `None` if
+ /// it's not a raw string literal.
+ num_hashes: Option<u32>,
+}
+
+impl<B: Buffer> ByteStringLit<B> {
+ /// Parses the input as a (raw) byte string literal. Returns an error if the
+ /// input is invalid or represents a different kind of literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ if input.is_empty() {
+ return Err(perr(None, Empty));
+ }
+ if !input.starts_with(r#"b""#) && !input.starts_with("br") {
+ return Err(perr(None, InvalidByteStringLiteralStart));
+ }
+
+ Self::parse_impl(input)
+ }
+
+ /// Returns the string value this literal represents (where all escapes have
+ /// been turned into their respective values).
+ pub fn value(&self) -> &[u8] {
+ self.value.as_deref().unwrap_or(&self.raw.as_bytes()[self.inner_range()])
+ }
+
+ /// Like `value` but returns a potentially owned version of the value.
+ ///
+ /// The return value is either `Cow<'static, [u8]>` if `B = String`, or
+ /// `Cow<'a, [u8]>` if `B = &'a str`.
+ pub fn into_value(self) -> B::ByteCow {
+ let inner_range = self.inner_range();
+ let Self { raw, value, .. } = self;
+ value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow())
+ }
+
+ /// Returns whether this literal is a raw string literal (starting with
+ /// `r`).
+ pub fn is_raw_byte_string(&self) -> bool {
+ self.num_hashes.is_some()
+ }
+
+ /// The range within `self.raw` that excludes the quotes and potential `r#`.
+ fn inner_range(&self) -> Range<usize> {
+ match self.num_hashes {
+ None => 2..self.raw.len() - 1,
+ Some(n) => 2 + n as usize + 1..self.raw.len() - n as usize - 1,
+ }
+ }
+
+ /// Precondition: input has to start with either `b"` or `br`.
+ pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
+ if input.starts_with(r"br") {
+ let (value, num_hashes) = scan_raw_string::<u8>(&input, 2)?;
+ Ok(Self {
+ raw: input,
+ value: value.map(|s| s.into_bytes()),
+ num_hashes: Some(num_hashes),
+ })
+ } else {
+ let value = unescape_string::<u8>(&input, 2)?.map(|s| s.into_bytes());
+ Ok(Self {
+ raw: input,
+ value,
+ num_hashes: None,
+ })
+ }
+ }
+}
+
+impl ByteStringLit<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn into_owned(self) -> ByteStringLit<String> {
+ ByteStringLit {
+ raw: self.raw.to_owned(),
+ value: self.value,
+ num_hashes: self.num_hashes,
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for ByteStringLit<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.pad(&self.raw)
+ }
+}
+
+
+#[cfg(test)]
+mod tests;
diff --git a/src/bytestr/tests.rs b/src/bytestr/tests.rs
new file mode 100644
index 0000000..8e3c033
--- /dev/null
+++ b/src/bytestr/tests.rs
@@ -0,0 +1,210 @@
+use crate::{Literal, ByteStringLit, test_util::assert_parse_ok_eq};
+
+// ===== Utility functions =======================================================================
+
+macro_rules! check {
+ ($lit:literal, $has_escapes:expr, $num_hashes:expr) => {
+ let input = stringify!($lit);
+ let expected = ByteStringLit {
+ raw: input,
+ value: if $has_escapes { Some($lit.to_vec()) } else { None },
+ num_hashes: $num_hashes,
+ };
+
+ assert_parse_ok_eq(
+ input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse");
+ assert_parse_ok_eq(
+ input, Literal::parse(input), Literal::ByteString(expected), "Literal::parse");
+ assert_eq!(ByteStringLit::parse(input).unwrap().value(), $lit);
+ assert_eq!(ByteStringLit::parse(input).unwrap().into_value().as_ref(), $lit);
+ };
+}
+
+
+// ===== Actual tests ============================================================================
+
+#[test]
+fn simple() {
+ check!(b"", false, None);
+ check!(b"a", false, None);
+ check!(b"peter", false, None);
+}
+
+#[test]
+fn special_whitespace() {
+ let strings = ["\n", "\t", "foo\tbar", "baz\n"];
+
+ for &s in &strings {
+ let input = format!(r#"b"{}""#, s);
+ let input_raw = format!(r#"br"{}""#, s);
+ for (input, num_hashes) in vec![(input, None), (input_raw, Some(0))] {
+ let expected = ByteStringLit {
+ raw: &*input,
+ value: None,
+ num_hashes,
+ };
+ assert_parse_ok_eq(
+ &input, ByteStringLit::parse(&*input), expected.clone(), "ByteStringLit::parse");
+ assert_parse_ok_eq(
+ &input, Literal::parse(&*input), Literal::ByteString(expected), "Literal::parse");
+ assert_eq!(ByteStringLit::parse(&*input).unwrap().value(), s.as_bytes());
+ assert_eq!(ByteStringLit::parse(&*input).unwrap().into_value(), s.as_bytes());
+ }
+ }
+
+ let res = ByteStringLit::parse("br\"\r\"").expect("failed to parse");
+ assert_eq!(res.value(), b"\r");
+}
+
+#[test]
+fn simple_escapes() {
+ check!(b"a\nb", true, None);
+ check!(b"\nb", true, None);
+ check!(b"a\n", true, None);
+ check!(b"\n", true, None);
+
+ check!(b"\x60foo \t bar\rbaz\n banana \0kiwi", true, None);
+ check!(b"foo \\ferris", true, None);
+ check!(b"baz \\ferris\"box", true, None);
+ check!(b"\\foo\\ banana\" baz\"", true, None);
+ check!(b"\"foo \\ferris \" baz\\", true, None);
+
+ check!(b"\x00", true, None);
+ check!(b" \x01", true, None);
+ check!(b"\x0c foo", true, None);
+ check!(b" foo\x0D ", true, None);
+ check!(b"\\x13", true, None);
+ check!(b"\"x30", true, None);
+}
+
+#[test]
+fn string_continue() {
+ check!(b"foo\
+ bar", true, None);
+ check!(b"foo\
+bar", true, None);
+
+ check!(b"foo\
+
+ banana", true, None);
+
+ // Weird whitespace characters
+ let lit = ByteStringLit::parse("b\"foo\\\n\r\t\n \n\tbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), b"foobar");
+
+ // Raw strings do not handle "string continues"
+ check!(br"foo\
+ bar", false, Some(0));
+}
+
+#[test]
+fn crlf_newlines() {
+ let lit = ByteStringLit::parse("b\"foo\r\nbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), b"foo\nbar");
+
+ let lit = ByteStringLit::parse("b\"\r\nbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), b"\nbar");
+
+ let lit = ByteStringLit::parse("b\"foo\r\n\"").expect("failed to parse");
+ assert_eq!(lit.value(), b"foo\n");
+
+ let lit = ByteStringLit::parse("br\"foo\r\nbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), b"foo\nbar");
+
+ let lit = ByteStringLit::parse("br#\"\r\nbar\"#").expect("failed to parse");
+ assert_eq!(lit.value(), b"\nbar");
+
+ let lit = ByteStringLit::parse("br##\"foo\r\n\"##").expect("failed to parse");
+ assert_eq!(lit.value(), b"foo\n");
+}
+
+#[test]
+fn raw_byte_string() {
+ check!(br"", false, Some(0));
+ check!(br"a", false, Some(0));
+ check!(br"peter", false, Some(0));
+ check!(br"Greetings jason!", false, Some(0));
+
+ check!(br#""#, false, Some(1));
+ check!(br#"a"#, false, Some(1));
+ check!(br##"peter"##, false, Some(2));
+ check!(br###"Greetings # Jason!"###, false, Some(3));
+ check!(br########"we ## need #### more ####### hashtags"########, false, Some(8));
+
+ check!(br#"foo " bar"#, false, Some(1));
+ check!(br##"foo " bar"##, false, Some(2));
+ check!(br#"foo """" '"'" bar"#, false, Some(1));
+ check!(br#""foo""#, false, Some(1));
+ check!(br###""foo'"###, false, Some(3));
+ check!(br#""x'#_#s'"#, false, Some(1));
+ check!(br"#", false, Some(0));
+ check!(br"foo#", false, Some(0));
+ check!(br"##bar", false, Some(0));
+ check!(br###""##foo"##bar'"###, false, Some(3));
+
+ check!(br"foo\n\t\r\0\\x60\u{123}doggo", false, Some(0));
+ check!(br#"cat\n\t\r\0\\x60\u{123}doggo"#, false, Some(1));
+}
+
+#[test]
+fn parse_err() {
+ assert_err!(ByteStringLit, r#"b""#, UnterminatedString, None);
+ assert_err!(ByteStringLit, r#"b"cat"#, UnterminatedString, None);
+ assert_err!(ByteStringLit, r#"b"Jurgen"#, UnterminatedString, None);
+ assert_err!(ByteStringLit, r#"b"foo bar baz"#, UnterminatedString, None);
+
+ assert_err!(ByteStringLit, r#"b"fox"peter"#, UnexpectedChar, 6..11);
+ assert_err!(ByteStringLit, r#"b"fox"peter""#, UnexpectedChar, 6..12);
+ assert_err!(ByteStringLit, r#"b"fox"bar"#, UnexpectedChar, 6..9);
+ assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10..16);
+
+ assert_err!(ByteStringLit, "b\"\r\"", IsolatedCr, 2);
+ assert_err!(ByteStringLit, "b\"fo\rx\"", IsolatedCr, 4);
+
+ assert_err!(ByteStringLit, r##"br####""##, UnterminatedRawString, None);
+ assert_err!(ByteStringLit, r#####"br##"foo"#bar"#####, UnterminatedRawString, None);
+ assert_err!(ByteStringLit, r##"br####"##, InvalidLiteral, None);
+ assert_err!(ByteStringLit, r##"br####x"##, InvalidLiteral, None);
+}
+
+#[test]
+fn non_ascii() {
+ assert_err!(ByteStringLit, r#"b"న""#, NonAsciiInByteLiteral, 2);
+ assert_err!(ByteStringLit, r#"b"foo犬""#, NonAsciiInByteLiteral, 5);
+ assert_err!(ByteStringLit, r#"b"x🦊baz""#, NonAsciiInByteLiteral, 3);
+ assert_err!(ByteStringLit, r#"br"న""#, NonAsciiInByteLiteral, 3);
+ assert_err!(ByteStringLit, r#"br"foo犬""#, NonAsciiInByteLiteral, 6);
+ assert_err!(ByteStringLit, r#"br"x🦊baz""#, NonAsciiInByteLiteral, 4);
+}
+
+#[test]
+fn invald_escapes() {
+ assert_err!(ByteStringLit, r#"b"\a""#, UnknownEscape, 2..4);
+ assert_err!(ByteStringLit, r#"b"foo\y""#, UnknownEscape, 5..7);
+ assert_err!(ByteStringLit, r#"b"\"#, UnterminatedString, None);
+ assert_err!(ByteStringLit, r#"b"\x""#, UnterminatedEscape, 2..4);
+ assert_err!(ByteStringLit, r#"b"foo\x1""#, UnterminatedEscape, 5..8);
+ assert_err!(ByteStringLit, r#"b" \xaj""#, InvalidXEscape, 3..7);
+ assert_err!(ByteStringLit, r#"b"\xjbbaz""#, InvalidXEscape, 2..6);
+}
+
+#[test]
+fn unicode_escape_not_allowed() {
+ assert_err!(ByteStringLit, r#"b"\u{0}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{00}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{b}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{B}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{7e}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{E4}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{e4}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{fc}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{Fc}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{fC}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{FC}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{b10}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{B10}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{0b10}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{2764}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{1f602}""#, UnicodeEscapeInByteLiteral, 2..4);
+ assert_err!(ByteStringLit, r#"b"\u{1F602}""#, UnicodeEscapeInByteLiteral, 2..4);
+}
diff --git a/src/char/mod.rs b/src/char/mod.rs
new file mode 100644
index 0000000..1480bdf
--- /dev/null
+++ b/src/char/mod.rs
@@ -0,0 +1,88 @@
+use std::fmt;
+
+use crate::{
+ Buffer, ParseError,
+ err::{perr, ParseErrorKind::*},
+ escape::unescape,
+ parse::first_byte_or_empty,
+};
+
+
+/// A character literal, e.g. `'g'` or `'🦊'`.
+///
+/// See [the reference][ref] for more information.
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#character-literals
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct CharLit<B: Buffer> {
+ raw: B,
+ value: char,
+}
+
+impl<B: Buffer> CharLit<B> {
+ /// Parses the input as a character literal. Returns an error if the input
+ /// is invalid or represents a different kind of literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ match first_byte_or_empty(&input)? {
+ b'\'' => Self::parse_impl(input),
+ _ => Err(perr(0, DoesNotStartWithQuote)),
+ }
+ }
+
+ /// Returns the character value that this literal represents.
+ pub fn value(&self) -> char {
+ self.value
+ }
+
+ /// Precondition: first character in input must be `'`.
+ pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
+ if input.len() == 1 {
+ return Err(perr(None, UnterminatedCharLiteral));
+ }
+ if *input.as_bytes().last().unwrap() != b'\'' {
+ return Err(perr(None, UnterminatedCharLiteral));
+ }
+
+ let inner = &input[1..input.len() - 1];
+ let first = inner.chars().nth(0).ok_or(perr(None, EmptyCharLiteral))?;
+ let (c, len) = match first {
+ '\'' => return Err(perr(1, UnescapedSingleQuote)),
+ '\n' | '\t' | '\r'
+ => return Err(perr(1, UnescapedSpecialWhitespace)),
+
+ '\\' => unescape::<char>(inner, 1)?,
+ other => (other, other.len_utf8()),
+ };
+ let rest = &inner[len..];
+
+ if !rest.is_empty() {
+ return Err(perr(len + 1..input.len() - 1, OverlongCharLiteral));
+ }
+
+ Ok(Self {
+ raw: input,
+ value: c,
+ })
+ }
+}
+
+impl CharLit<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn to_owned(&self) -> CharLit<String> {
+ CharLit {
+ raw: self.raw.to_owned(),
+ value: self.value,
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for CharLit<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.pad(&self.raw)
+ }
+}
+
+
+#[cfg(test)]
+mod tests;
diff --git a/src/char/tests.rs b/src/char/tests.rs
new file mode 100644
index 0000000..01ca2c5
--- /dev/null
+++ b/src/char/tests.rs
@@ -0,0 +1,213 @@
+use crate::{Literal, test_util::assert_parse_ok_eq};
+use super::CharLit;
+
+// ===== Utility functions =======================================================================
+
+macro_rules! check {
+ ($lit:literal) => {
+ let input = stringify!($lit);
+ let expected = CharLit {
+ raw: input,
+ value: $lit,
+ };
+
+ assert_parse_ok_eq(input, CharLit::parse(input), expected.clone(), "CharLit::parse");
+ assert_parse_ok_eq(input, Literal::parse(input), Literal::Char(expected), "Literal::parse");
+ assert_eq!(CharLit::parse(input).unwrap().value(), $lit);
+ };
+}
+
+
+// ===== Actual tests ============================================================================
+
+#[test]
+fn alphanumeric() {
+ check!('a');
+ check!('b');
+ check!('y');
+ check!('z');
+ check!('A');
+ check!('B');
+ check!('Y');
+ check!('Z');
+
+ check!('0');
+ check!('1');
+ check!('8');
+ check!('9');
+}
+
+#[test]
+fn special_chars() {
+ check!(' ');
+ check!('!');
+ check!('"');
+ check!('#');
+ check!('$');
+ check!('%');
+ check!('&');
+ check!('(');
+ check!(')');
+ check!('*');
+ check!('+');
+ check!(',');
+ check!('-');
+ check!('.');
+ check!('/');
+ check!(':');
+ check!(';');
+ check!('<');
+ check!('=');
+ check!('>');
+ check!('?');
+ check!('@');
+ check!('[');
+ check!(']');
+ check!('^');
+ check!('_');
+ check!('`');
+ check!('{');
+ check!('|');
+ check!('}');
+ check!('~');
+}
+
+#[test]
+fn unicode() {
+ check!('న');
+ check!('犬');
+ check!('🦊');
+}
+
+#[test]
+fn quote_escapes() {
+ check!('\'');
+ check!('\"');
+}
+
+#[test]
+fn ascii_escapes() {
+ check!('\n');
+ check!('\r');
+ check!('\t');
+ check!('\\');
+ check!('\0');
+
+ check!('\x00');
+ check!('\x01');
+ check!('\x0c');
+ check!('\x0D');
+ check!('\x13');
+ check!('\x30');
+ check!('\x30');
+ check!('\x4B');
+ check!('\x6b');
+ check!('\x7F');
+ check!('\x7f');
+}
+
+#[test]
+fn unicode_escapes() {
+ check!('\u{0}');
+ check!('\u{00}');
+ check!('\u{b}');
+ check!('\u{B}');
+ check!('\u{7e}');
+ check!('\u{E4}');
+ check!('\u{e4}');
+ check!('\u{fc}');
+ check!('\u{Fc}');
+ check!('\u{fC}');
+ check!('\u{FC}');
+ check!('\u{b10}');
+ check!('\u{B10}');
+ check!('\u{0b10}');
+ check!('\u{2764}');
+ check!('\u{1f602}');
+ check!('\u{1F602}');
+
+ check!('\u{0}');
+ check!('\u{0__}');
+ check!('\u{3_b}');
+ check!('\u{1_F_6_0_2}');
+ check!('\u{1_F6_02_____}');
+}
+
+#[test]
+fn invald_ascii_escapes() {
+ assert_err!(CharLit, r"'\x80'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\x81'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\x8a'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\x8F'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xa0'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xB0'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xc3'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xDf'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xff'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xfF'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xFf'", NonAsciiXEscape, 1..5);
+ assert_err!(CharLit, r"'\xFF'", NonAsciiXEscape, 1..5);
+}
+
+#[test]
+fn invald_escapes() {
+ assert_err!(CharLit, r"'\a'", UnknownEscape, 1..3);
+ assert_err!(CharLit, r"'\y'", UnknownEscape, 1..3);
+ assert_err!(CharLit, r"'\", UnterminatedCharLiteral, None);
+ assert_err!(CharLit, r"'\x'", UnterminatedEscape, 1..3);
+ assert_err!(CharLit, r"'\x1'", UnterminatedEscape, 1..4);
+ assert_err!(CharLit, r"'\xaj'", InvalidXEscape, 1..5);
+ assert_err!(CharLit, r"'\xjb'", InvalidXEscape, 1..5);
+}
+
+#[test]
+fn invalid_unicode_escapes() {
+ assert_err!(CharLit, r"'\u'", UnicodeEscapeWithoutBrace, 1..3);
+ assert_err!(CharLit, r"'\u '", UnicodeEscapeWithoutBrace, 1..3);
+ assert_err!(CharLit, r"'\u3'", UnicodeEscapeWithoutBrace, 1..3);
+
+ assert_err!(CharLit, r"'\u{'", UnterminatedUnicodeEscape, 1..4);
+ assert_err!(CharLit, r"'\u{12'", UnterminatedUnicodeEscape, 1..6);
+ assert_err!(CharLit, r"'\u{a0b'", UnterminatedUnicodeEscape, 1..7);
+ assert_err!(CharLit, r"'\u{a0_b '", UnterminatedUnicodeEscape, 1..10);
+
+ assert_err!(CharLit, r"'\u{_}'", InvalidStartOfUnicodeEscape, 4);
+ assert_err!(CharLit, r"'\u{_5f}'", InvalidStartOfUnicodeEscape, 4);
+
+ assert_err!(CharLit, r"'\u{x}'", NonHexDigitInUnicodeEscape, 4);
+ assert_err!(CharLit, r"'\u{0x}'", NonHexDigitInUnicodeEscape, 5);
+ assert_err!(CharLit, r"'\u{3bx}'", NonHexDigitInUnicodeEscape, 6);
+ assert_err!(CharLit, r"'\u{3b_x}'", NonHexDigitInUnicodeEscape, 7);
+ assert_err!(CharLit, r"'\u{4x_}'", NonHexDigitInUnicodeEscape, 5);
+
+ assert_err!(CharLit, r"'\u{1234567}'", TooManyDigitInUnicodeEscape, 10);
+ assert_err!(CharLit, r"'\u{1234567}'", TooManyDigitInUnicodeEscape, 10);
+ assert_err!(CharLit, r"'\u{1_23_4_56_7}'", TooManyDigitInUnicodeEscape, 14);
+ assert_err!(CharLit, r"'\u{abcdef123}'", TooManyDigitInUnicodeEscape, 10);
+
+ assert_err!(CharLit, r"'\u{110000}'", InvalidUnicodeEscapeChar, 1..10);
+}
+
+#[test]
+fn parse_err() {
+ assert_err!(CharLit, r"''", EmptyCharLiteral, None);
+ assert_err!(CharLit, r"' ''", OverlongCharLiteral, 2..3);
+
+ assert_err!(CharLit, r"'", UnterminatedCharLiteral, None);
+ assert_err!(CharLit, r"'a", UnterminatedCharLiteral, None);
+ assert_err!(CharLit, r"'\n", UnterminatedCharLiteral, None);
+ assert_err!(CharLit, r"'\x35", UnterminatedCharLiteral, None);
+
+ assert_err!(CharLit, r"'ab'", OverlongCharLiteral, 2..3);
+ assert_err!(CharLit, r"'a _'", OverlongCharLiteral, 2..4);
+ assert_err!(CharLit, r"'\n3'", OverlongCharLiteral, 3..4);
+
+ assert_err!(CharLit, r"", Empty, None);
+
+ assert_err!(CharLit, r"'''", UnescapedSingleQuote, 1);
+ assert_err!(CharLit, r"''''", UnescapedSingleQuote, 1);
+
+ assert_err!(CharLit, "'\n'", UnescapedSpecialWhitespace, 1);
+ assert_err!(CharLit, "'\t'", UnescapedSpecialWhitespace, 1);
+ assert_err!(CharLit, "'\r'", UnescapedSpecialWhitespace, 1);
+}
diff --git a/src/err.rs b/src/err.rs
new file mode 100644
index 0000000..dd5b472
--- /dev/null
+++ b/src/err.rs
@@ -0,0 +1,363 @@
+use std::{fmt, ops::Range};
+
+
+/// An error signaling that a different kind of token was expected. Returned by
+/// the various `TryFrom` impls.
+#[derive(Debug, Clone, Copy)]
+pub struct InvalidToken {
+ pub(crate) expected: TokenKind,
+ pub(crate) actual: TokenKind,
+ pub(crate) span: Span,
+}
+
+impl InvalidToken {
+ /// Returns a token stream representing `compile_error!("msg");` where
+ /// `"msg"` is the output of `self.to_string()`. **Panics if called outside
+ /// of a proc-macro context!**
+ pub fn to_compile_error(&self) -> proc_macro::TokenStream {
+ use proc_macro::{Delimiter, Ident, Group, Punct, Spacing, TokenTree};
+
+ let span = match self.span {
+ Span::One(s) => s,
+ #[cfg(feature = "proc-macro2")]
+ Span::Two(s) => s.unwrap(),
+ };
+ let msg = self.to_string();
+ let tokens = vec![
+ TokenTree::from(Ident::new("compile_error", span)),
+ TokenTree::from(Punct::new('!', Spacing::Alone)),
+ TokenTree::from(Group::new(
+ Delimiter::Parenthesis,
+ TokenTree::from(proc_macro::Literal::string(&msg)).into(),
+ )),
+ ];
+
+
+ tokens.into_iter().map(|mut t| { t.set_span(span); t }).collect()
+ }
+
+ /// Like [`to_compile_error`][Self::to_compile_error], but returns a token
+ /// stream from `proc_macro2` and does not panic outside of a proc-macro
+ /// context.
+ #[cfg(feature = "proc-macro2")]
+ pub fn to_compile_error2(&self) -> proc_macro2::TokenStream {
+ use proc_macro2::{Delimiter, Ident, Group, Punct, Spacing, TokenTree};
+
+ let span = match self.span {
+ Span::One(s) => proc_macro2::Span::from(s),
+ Span::Two(s) => s,
+ };
+ let msg = self.to_string();
+ let tokens = vec![
+ TokenTree::from(Ident::new("compile_error", span)),
+ TokenTree::from(Punct::new('!', Spacing::Alone)),
+ TokenTree::from(Group::new(
+ Delimiter::Parenthesis,
+ TokenTree::from(proc_macro2::Literal::string(&msg)).into(),
+ )),
+ ];
+
+
+ tokens.into_iter().map(|mut t| { t.set_span(span); t }).collect()
+ }
+}
+
+impl std::error::Error for InvalidToken {}
+
+impl fmt::Display for InvalidToken {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn kind_desc(kind: TokenKind) -> &'static str {
+ match kind {
+ TokenKind::Punct => "a punctuation character",
+ TokenKind::Ident => "an identifier",
+ TokenKind::Group => "a group",
+ TokenKind::Literal => "a literal",
+ TokenKind::BoolLit => "a bool literal (`true` or `false`)",
+ TokenKind::ByteLit => "a byte literal (e.g. `b'r')",
+ TokenKind::ByteStringLit => r#"a byte string literal (e.g. `b"fox"`)"#,
+ TokenKind::CharLit => "a character literal (e.g. `'P'`)",
+ TokenKind::FloatLit => "a float literal (e.g. `3.14`)",
+ TokenKind::IntegerLit => "an integer literal (e.g. `27`)",
+ TokenKind::StringLit => r#"a string literal (e.g. "Ferris")"#,
+ }
+ }
+
+ write!(f, "expected {}, but found {}", kind_desc(self.expected), kind_desc(self.actual))
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum TokenKind {
+ Punct,
+ Ident,
+ Group,
+ Literal,
+ BoolLit,
+ ByteLit,
+ ByteStringLit,
+ CharLit,
+ FloatLit,
+ IntegerLit,
+ StringLit,
+}
+
+/// Unfortunately, we have to deal with both cases.
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum Span {
+ One(proc_macro::Span),
+ #[cfg(feature = "proc-macro2")]
+ Two(proc_macro2::Span),
+}
+
+impl From<proc_macro::Span> for Span {
+ fn from(src: proc_macro::Span) -> Self {
+ Self::One(src)
+ }
+}
+
+#[cfg(feature = "proc-macro2")]
+impl From<proc_macro2::Span> for Span {
+ fn from(src: proc_macro2::Span) -> Self {
+ Self::Two(src)
+ }
+}
+
+/// Errors during parsing.
+///
+/// This type should be seen primarily for error reporting and not for catching
+/// specific cases. The span and error kind are not guaranteed to be stable
+/// over different versions of this library, meaning that a returned error can
+/// change from one version to the next. There are simply too many fringe cases
+/// that are not easy to classify as a specific error kind. It depends entirely
+/// on the specific parser code how an invalid input is categorized.
+///
+/// Consider these examples:
+/// - `'\` can be seen as
+/// - invalid escape in character literal, or
+/// - unterminated character literal.
+/// - `'''` can be seen as
+/// - empty character literal, or
+/// - unescaped quote character in character literal.
+/// - `0b64` can be seen as
+/// - binary integer literal with invalid digit 6, or
+/// - binary integer literal with invalid digit 4, or
+/// - decimal integer literal with invalid digit b, or
+/// - decimal integer literal 0 with unknown type suffix `b64`.
+///
+/// If you want to see more if these examples, feel free to check out the unit
+/// tests of this library.
+///
+/// While this library does its best to emit sensible and precise errors, and to
+/// keep the returned errors as stable as possible, full stability cannot be
+/// guaranteed.
+#[derive(Debug, Clone)]
+pub struct ParseError {
+ pub(crate) span: Option<Range<usize>>,
+ pub(crate) kind: ParseErrorKind,
+}
+
+impl ParseError {
+ /// Returns a span of this error, if available. **Note**: the returned span
+ /// might change in future versions of this library. See [the documentation
+ /// of this type][ParseError] for more information.
+ pub fn span(&self) -> Option<Range<usize>> {
+ self.span.clone()
+ }
+}
+
+/// This is a free standing function instead of an associated one to reduce
+/// noise around parsing code. There are lots of places that create errors, we
+/// I wanna keep them as short as possible.
+pub(crate) fn perr(span: impl SpanLike, kind: ParseErrorKind) -> ParseError {
+ ParseError {
+ span: span.into_span(),
+ kind,
+ }
+}
+
+pub(crate) trait SpanLike {
+ fn into_span(self) -> Option<Range<usize>>;
+}
+
+impl SpanLike for Option<Range<usize>> {
+ fn into_span(self) -> Option<Range<usize>> {
+ self
+ }
+}
+impl SpanLike for Range<usize> {
+ fn into_span(self) -> Option<Range<usize>> {
+ Some(self)
+ }
+}
+impl SpanLike for usize {
+ fn into_span(self) -> Option<Range<usize>> {
+ Some(self..self + 1)
+ }
+}
+
+
+/// Kinds of errors.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub(crate) enum ParseErrorKind {
+ /// The input was an empty string
+ Empty,
+
+ /// An unexpected char was encountered.
+ UnexpectedChar,
+
+ /// Literal was not recognized.
+ InvalidLiteral,
+
+ /// Input does not start with decimal digit when trying to parse an integer.
+ DoesNotStartWithDigit,
+
+ /// A digit invalid for the specified integer base was found.
+ InvalidDigit,
+
+ /// Integer literal does not contain any valid digits.
+ NoDigits,
+
+ /// Found a integer type suffix that is invalid.
+ InvalidIntegerTypeSuffix,
+
+ /// Found a float type suffix that is invalid. Only `f32` and `f64` are
+ /// valid.
+ InvalidFloatTypeSuffix,
+
+ /// Exponent of a float literal does not contain any digits.
+ NoExponentDigits,
+
+ /// An unknown escape code, e.g. `\b`.
+ UnknownEscape,
+
+ /// A started escape sequence where the input ended before the escape was
+ /// finished.
+ UnterminatedEscape,
+
+ /// An `\x` escape where the two digits are not valid hex digits.
+ InvalidXEscape,
+
+ /// A string or character literal using the `\xNN` escape where `NN > 0x7F`.
+ NonAsciiXEscape,
+
+ /// A `\u{...}` escape in a byte or byte string literal.
+ UnicodeEscapeInByteLiteral,
+
+ /// A Unicode escape that does not start with a hex digit.
+ InvalidStartOfUnicodeEscape,
+
+ /// A `\u{...}` escape that lacks the opening brace.
+ UnicodeEscapeWithoutBrace,
+
+ /// In a `\u{...}` escape, a non-hex digit and non-underscore character was
+ /// found.
+ NonHexDigitInUnicodeEscape,
+
+ /// More than 6 digits found in unicode escape.
+ TooManyDigitInUnicodeEscape,
+
+ /// The value from a unicode escape does not represent a valid character.
+ InvalidUnicodeEscapeChar,
+
+ /// A `\u{..` escape that is not terminated (lacks the closing brace).
+ UnterminatedUnicodeEscape,
+
+ /// A character literal that's not terminated.
+ UnterminatedCharLiteral,
+
+ /// A character literal that contains more than one character.
+ OverlongCharLiteral,
+
+ /// An empty character literal, i.e. `''`.
+ EmptyCharLiteral,
+
+ UnterminatedByteLiteral,
+ OverlongByteLiteral,
+ EmptyByteLiteral,
+ NonAsciiInByteLiteral,
+
+ /// A `'` character was not escaped in a character or byte literal, or a `"`
+ /// character was not escaped in a string or byte string literal.
+ UnescapedSingleQuote,
+
+ /// A \n, \t or \r raw character in a char or byte literal.
+ UnescapedSpecialWhitespace,
+
+ /// When parsing a character, byte, string or byte string literal directly
+ /// and the input does not start with the corresponding quote character
+ /// (plus optional raw string prefix).
+ DoesNotStartWithQuote,
+
+ /// Unterminated raw string literal.
+ UnterminatedRawString,
+
+ /// String literal without a `"` at the end.
+ UnterminatedString,
+
+ /// Invalid start for a string literal.
+ InvalidStringLiteralStart,
+
+ /// Invalid start for a byte literal.
+ InvalidByteLiteralStart,
+
+ InvalidByteStringLiteralStart,
+
+ /// An literal `\r` character not followed by a `\n` character in a
+ /// (raw) string or byte string literal.
+ IsolatedCr,
+}
+
+impl std::error::Error for ParseError {}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use ParseErrorKind::*;
+
+ let description = match self.kind {
+ Empty => "input is empty",
+ UnexpectedChar => "unexpected character",
+ InvalidLiteral => "invalid literal",
+ DoesNotStartWithDigit => "number literal does not start with decimal digit",
+ InvalidDigit => "integer literal contains a digit invalid for its base",
+ NoDigits => "integer literal does not contain any digits",
+ InvalidIntegerTypeSuffix => "invalid integer type suffix",
+ InvalidFloatTypeSuffix => "invalid floating point type suffix",
+ NoExponentDigits => "exponent of floating point literal does not contain any digits",
+ UnknownEscape => "unknown escape",
+ UnterminatedEscape => "unterminated escape: input ended too soon",
+ InvalidXEscape => r"invalid `\x` escape: not followed by two hex digits",
+ NonAsciiXEscape => r"`\x` escape in char/string literal exceed ASCII range",
+ UnicodeEscapeInByteLiteral => r"`\u{...}` escape in byte (string) literal not allowed",
+ InvalidStartOfUnicodeEscape => r"invalid start of `\u{...}` escape",
+ UnicodeEscapeWithoutBrace => r"`Unicode \u{...}` escape without opening brace",
+ NonHexDigitInUnicodeEscape => r"non-hex digit found in `\u{...}` escape",
+ TooManyDigitInUnicodeEscape => r"more than six digits in `\u{...}` escape",
+ InvalidUnicodeEscapeChar => r"value specified in `\u{...}` escape is not a valid char",
+ UnterminatedUnicodeEscape => r"unterminated `\u{...}` escape",
+ UnterminatedCharLiteral => "character literal is not terminated",
+ OverlongCharLiteral => "character literal contains more than one character",
+ EmptyCharLiteral => "empty character literal",
+ UnterminatedByteLiteral => "byte literal is not terminated",
+ OverlongByteLiteral => "byte literal contains more than one byte",
+ EmptyByteLiteral => "empty byte literal",
+ NonAsciiInByteLiteral => "non ASCII character in byte (string) literal",
+ UnescapedSingleQuote => "character literal contains unescaped ' character",
+ UnescapedSpecialWhitespace => r"unescaped newline (\n), tab (\t) or cr (\r) character",
+ DoesNotStartWithQuote => "invalid start for char/byte/string literal",
+ UnterminatedRawString => "unterminated raw (byte) string literal",
+ UnterminatedString => "unterminated (byte) string literal",
+ InvalidStringLiteralStart => "invalid start for string literal",
+ InvalidByteLiteralStart => "invalid start for byte literal",
+ InvalidByteStringLiteralStart => "invalid start for byte string literal",
+ IsolatedCr => r"`\r` not immediately followed by `\n` in string",
+ };
+
+ description.fmt(f)?;
+ if let Some(span) = &self.span {
+ write!(f, " (at {}..{})", span.start, span.end)?;
+ }
+
+ Ok(())
+ }
+}
diff --git a/src/escape.rs b/src/escape.rs
new file mode 100644
index 0000000..3e93113
--- /dev/null
+++ b/src/escape.rs
@@ -0,0 +1,255 @@
+use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::hex_digit_value};
+
+
+/// Must start with `\`
+pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> {
+ let first = input.as_bytes().get(1)
+ .ok_or(perr(offset, UnterminatedEscape))?;
+ let out = match first {
+ // Quote escapes
+ b'\'' => (E::from_byte(b'\''), 2),
+ b'"' => (E::from_byte(b'"'), 2),
+
+ // Ascii escapes
+ b'n' => (E::from_byte(b'\n'), 2),
+ b'r' => (E::from_byte(b'\r'), 2),
+ b't' => (E::from_byte(b'\t'), 2),
+ b'\\' => (E::from_byte(b'\\'), 2),
+ b'0' => (E::from_byte(b'\0'), 2),
+ b'x' => {
+ let hex_string = input.get(2..4)
+ .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))?
+ .as_bytes();
+ let first = hex_digit_value(hex_string[0])
+ .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
+ let second = hex_digit_value(hex_string[1])
+ .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
+ let value = second + 16 * first;
+
+ if E::SUPPORTS_UNICODE && value > 0x7F {
+ return Err(perr(offset..offset + 4, NonAsciiXEscape));
+ }
+
+ (E::from_byte(value), 4)
+ },
+
+ // Unicode escape
+ b'u' => {
+ if !E::SUPPORTS_UNICODE {
+ return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral));
+ }
+
+ if input.as_bytes().get(2) != Some(&b'{') {
+ return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace));
+ }
+
+ let closing_pos = input.bytes().position(|b| b == b'}')
+ .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?;
+
+ let inner = &input[3..closing_pos];
+ if inner.as_bytes().first() == Some(&b'_') {
+ return Err(perr(4, InvalidStartOfUnicodeEscape));
+ }
+
+ let mut v: u32 = 0;
+ let mut digit_count = 0;
+ for (i, b) in inner.bytes().enumerate() {
+ if b == b'_'{
+ continue;
+ }
+
+ let digit = hex_digit_value(b)
+ .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?;
+
+ if digit_count == 6 {
+ return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape));
+ }
+ digit_count += 1;
+ v = 16 * v + digit as u32;
+ }
+
+ let c = std::char::from_u32(v)
+ .ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?;
+
+ (E::from_char(c), closing_pos + 1)
+ }
+
+ _ => return Err(perr(offset..offset + 2, UnknownEscape)),
+ };
+
+ Ok(out)
+}
+
+pub(crate) trait Escapee: Into<char> {
+ const SUPPORTS_UNICODE: bool;
+ fn from_byte(b: u8) -> Self;
+ fn from_char(c: char) -> Self;
+}
+
+impl Escapee for u8 {
+ const SUPPORTS_UNICODE: bool = false;
+ fn from_byte(b: u8) -> Self {
+ b
+ }
+ fn from_char(_: char) -> Self {
+ panic!("bug: `<u8 as Escapee>::from_char` was called");
+ }
+}
+
+impl Escapee for char {
+ const SUPPORTS_UNICODE: bool = true;
+ fn from_byte(b: u8) -> Self {
+ b.into()
+ }
+ fn from_char(c: char) -> Self {
+ c
+ }
+}
+
+/// Checks whether the character is skipped after a string continue start
+/// (unescaped backlash followed by `\n`).
+pub(crate) fn is_string_continue_skipable_whitespace(b: u8) -> bool {
+ b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'
+}
+
+/// Unescapes a whole string or byte string.
+pub(crate) fn unescape_string<E: Escapee>(
+ input: &str,
+ offset: usize,
+) -> Result<Option<String>, ParseError> {
+ let mut i = offset;
+ let mut end_last_escape = offset;
+ let mut value = String::new();
+ while i < input.len() - 1 {
+ match input.as_bytes()[i] {
+ // Handle "string continue".
+ b'\\' if input.as_bytes()[i + 1] == b'\n' => {
+ value.push_str(&input[end_last_escape..i]);
+
+ // Find the first non-whitespace character.
+ let end_escape = input[i + 2..].bytes()
+ .position(|b| !is_string_continue_skipable_whitespace(b))
+ .ok_or(perr(None, UnterminatedString))?;
+
+ i += 2 + end_escape;
+ end_last_escape = i;
+ }
+ b'\\' => {
+ let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?;
+ value.push_str(&input[end_last_escape..i]);
+ value.push(c.into());
+ i += len;
+ end_last_escape = i;
+ }
+ b'\r' => {
+ if input.as_bytes()[i + 1] == b'\n' {
+ value.push_str(&input[end_last_escape..i]);
+ value.push('\n');
+ i += 2;
+ end_last_escape = i;
+ } else {
+ return Err(perr(i, IsolatedCr))
+ }
+ }
+ b'"' => return Err(perr(i + 1..input.len(), UnexpectedChar)),
+ b if !E::SUPPORTS_UNICODE && !b.is_ascii()
+ => return Err(perr(i, NonAsciiInByteLiteral)),
+ _ => i += 1,
+ }
+ }
+
+ if input.as_bytes()[input.len() - 1] != b'"' || input.len() == offset {
+ return Err(perr(None, UnterminatedString));
+ }
+
+ // `value` is only empty if there was no escape in the input string
+ // (with the special case of the input being empty). This means the
+ // string value basically equals the input, so we store `None`.
+ let value = if value.is_empty() {
+ None
+ } else {
+ // There was an escape in the string, so we need to push the
+ // remaining unescaped part of the string still.
+ value.push_str(&input[end_last_escape..input.len() - 1]);
+ Some(value)
+ };
+
+ Ok(value)
+}
+
+/// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to
+/// just `\n` sequences. Returns an optional new string (if the input contained
+/// any `\r\n`) and the number of hashes used by the literal.
+pub(crate) fn scan_raw_string<E: Escapee>(
+ input: &str,
+ offset: usize,
+) -> Result<(Option<String>, u32), ParseError> {
+ // Raw string literal
+ let num_hashes = input[offset..].bytes().position(|b| b != b'#')
+ .ok_or(perr(None, InvalidLiteral))?;
+
+ if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
+ return Err(perr(None, InvalidLiteral));
+ }
+ let start_inner = offset + num_hashes + 1;
+ let hashes = &input[offset..num_hashes + offset];
+
+ let mut closing_quote_pos = None;
+ let mut i = start_inner;
+ let mut end_last_escape = start_inner;
+ let mut value = String::new();
+ while i < input.len() {
+ let b = input.as_bytes()[i];
+ if b == b'"' && input[i + 1..].starts_with(hashes) {
+ closing_quote_pos = Some(i);
+ break;
+ }
+
+ if b == b'\r' {
+ // Convert `\r\n` into `\n`. This is currently not well documented
+ // in the Rust reference, but is done even for raw strings. That's
+ // because rustc simply converts all line endings when reading
+ // source files.
+ if input.as_bytes().get(i + 1) == Some(&b'\n') {
+ value.push_str(&input[end_last_escape..i]);
+ value.push('\n');
+ i += 2;
+ end_last_escape = i;
+ continue;
+ } else if E::SUPPORTS_UNICODE {
+ // If no \n follows the \r and we are scanning a raw string
+ // (not raw byte string), we error.
+ return Err(perr(i, IsolatedCr))
+ }
+ }
+
+ if !E::SUPPORTS_UNICODE {
+ if !b.is_ascii() {
+ return Err(perr(i, NonAsciiInByteLiteral));
+ }
+ }
+
+ i += 1;
+ }
+
+ let closing_quote_pos = closing_quote_pos
+ .ok_or(perr(None, UnterminatedRawString))?;
+
+ if closing_quote_pos + num_hashes != input.len() - 1 {
+ return Err(perr(closing_quote_pos + num_hashes + 1..input.len(), UnexpectedChar));
+ }
+
+ // `value` is only empty if there was no \r\n in the input string (with the
+ // special case of the input being empty). This means the string value
+ // equals the input, so we store `None`.
+ let value = if value.is_empty() {
+ None
+ } else {
+ // There was an \r\n in the string, so we need to push the remaining
+ // unescaped part of the string still.
+ value.push_str(&input[end_last_escape..closing_quote_pos]);
+ Some(value)
+ };
+
+ Ok((value, num_hashes as u32))
+}
diff --git a/src/float/mod.rs b/src/float/mod.rs
new file mode 100644
index 0000000..e30a336
--- /dev/null
+++ b/src/float/mod.rs
@@ -0,0 +1,202 @@
+use std::fmt;
+
+use crate::{
+ Buffer, ParseError,
+ err::{perr, ParseErrorKind::*},
+ parse::{end_dec_digits, first_byte_or_empty},
+};
+
+
+
+/// A floating point literal, e.g. `3.14`, `8.`, `135e12`, `27f32` or `1.956e2f64`.
+///
+/// This kind of literal has several forms, but generally consists of a main
+/// number part, an optional exponent and an optional type suffix. See
+/// [the reference][ref] for more information.
+///
+/// A leading minus sign `-` is not part of the literal grammar! `-3.14` are two
+/// tokens in the Rust grammar.
+///
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct FloatLit<B: Buffer> {
+ /// Basically the whole literal, but without the type suffix. Other `usize`
+ /// fields in this struct partition this string. `end_integer_part` is
+ /// always <= `end_fractional_part`.
+ ///
+ /// ```text
+ /// 12_3.4_56e789
+ /// ╷ ╷
+ /// | └ end_fractional_part = 9
+ /// └ end_integer_part = 4
+ ///
+ /// 246.
+ /// ╷╷
+ /// |└ end_fractional_part = 4
+ /// └ end_integer_part = 3
+ ///
+ /// 1234e89
+ /// ╷
+ /// |
+ /// └ end_integer_part = end_fractional_part = 4
+ /// ```
+ number_part: B,
+
+ /// The first index not part of the integer part anymore. Since the integer
+ /// part is at the start, this is also the length of that part.
+ end_integer_part: usize,
+
+ /// The first index after the fractional part.
+ end_fractional_part: usize,
+
+ /// Optional type suffix.
+ type_suffix: Option<FloatType>,
+}
+
+/// All possible float type suffixes.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum FloatType {
+ F32,
+ F64,
+}
+
+impl<B: Buffer> FloatLit<B> {
+ /// Parses the input as a floating point literal. Returns an error if the
+ /// input is invalid or represents a different kind of literal.
+ pub fn parse(s: B) -> Result<Self, ParseError> {
+ match first_byte_or_empty(&s)? {
+ b'0'..=b'9' => Self::parse_impl(s),
+ _ => Err(perr(0, DoesNotStartWithDigit)),
+ }
+ }
+
+ /// Returns the whole number part (including integer part, fractional part
+ /// and exponent), but without the type suffix. If you want an actual
+ /// floating point value, you need to parse this string, e.g. with
+ /// `f32::from_str` or an external crate.
+ pub fn number_part(&self) -> &str {
+ &self.number_part
+ }
+
+ /// Returns the non-empty integer part of this literal.
+ pub fn integer_part(&self) -> &str {
+ &(*self.number_part)[..self.end_integer_part]
+ }
+
+ /// Returns the optional fractional part of this literal. Does not include
+ /// the period. If a period exists in the input, `Some` is returned, `None`
+ /// otherwise. Note that `Some("")` might be returned, e.g. for `3.`.
+ pub fn fractional_part(&self) -> Option<&str> {
+ if self.end_integer_part == self.end_fractional_part {
+ None
+ } else {
+ Some(&(*self.number_part)[self.end_integer_part + 1..self.end_fractional_part])
+ }
+ }
+
+ /// Optional exponent part. Might be empty if there was no exponent part in
+ /// the input. Includes the `e` or `E` at the beginning.
+ pub fn exponent_part(&self) -> &str {
+ &(*self.number_part)[self.end_fractional_part..]
+ }
+
+ /// The optional type suffix.
+ pub fn type_suffix(&self) -> Option<FloatType> {
+ self.type_suffix
+ }
+
+ /// Precondition: first byte of string has to be in `b'0'..=b'9'`.
+ pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
+ // Integer part.
+ let end_integer_part = end_dec_digits(&input);
+ let rest = &input[end_integer_part..];
+
+
+ // Fractional part.
+ let end_fractional_part = if rest.as_bytes().get(0) == Some(&b'.') {
+ // The fractional part must not start with `_`.
+ if rest.as_bytes().get(1) == Some(&b'_') {
+ return Err(perr(end_integer_part + 1, UnexpectedChar));
+ }
+
+ end_dec_digits(&rest[1..]) + 1 + end_integer_part
+ } else {
+ end_integer_part
+ };
+ let rest = &input[end_fractional_part..];
+
+ // If we have a period that is not followed by decimal digits, the
+ // literal must end now.
+ if end_integer_part + 1 == end_fractional_part && !rest.is_empty() {
+ return Err(perr(end_integer_part + 1, UnexpectedChar));
+ }
+
+
+ // Optional exponent.
+ let end_number_part = if rest.starts_with('e') || rest.starts_with('E') {
+ // Strip single - or + sign at the beginning.
+ let exp_number_start = match rest.as_bytes().get(1) {
+ Some(b'-') | Some(b'+') => 2,
+ _ => 1,
+ };
+
+ // Find end of exponent and make sure there is at least one digit.
+ let end_exponent = end_dec_digits(&rest[exp_number_start..]) + exp_number_start;
+ if !rest[exp_number_start..end_exponent].bytes().any(|b| matches!(b, b'0'..=b'9')) {
+ return Err(perr(
+ end_fractional_part..end_fractional_part + end_exponent,
+ NoExponentDigits,
+ ));
+ }
+
+ end_exponent + end_fractional_part
+ } else {
+ end_fractional_part
+ };
+
+
+ // Type suffix
+ let type_suffix = match &input[end_number_part..] {
+ "" => None,
+ "f32" => Some(FloatType::F32),
+ "f64" => Some(FloatType::F64),
+ _ => return Err(perr(end_number_part..input.len(), InvalidFloatTypeSuffix)),
+ };
+
+ Ok(Self {
+ number_part: input.cut(0..end_number_part),
+ end_integer_part,
+ end_fractional_part,
+ type_suffix,
+ })
+ }
+}
+
+impl FloatLit<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn to_owned(&self) -> FloatLit<String> {
+ FloatLit {
+ number_part: self.number_part.to_owned(),
+ end_integer_part: self.end_integer_part,
+ end_fractional_part: self.end_fractional_part,
+ type_suffix: self.type_suffix,
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for FloatLit<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let suffix = match self.type_suffix {
+ None => "",
+ Some(FloatType::F32) => "f32",
+ Some(FloatType::F64) => "f64",
+ };
+ write!(f, "{}{}", self.number_part(), suffix)
+ }
+}
+
+
+#[cfg(test)]
+mod tests;
diff --git a/src/float/tests.rs b/src/float/tests.rs
new file mode 100644
index 0000000..44f734f
--- /dev/null
+++ b/src/float/tests.rs
@@ -0,0 +1,205 @@
+use crate::{
+ Literal, ParseError,
+ test_util::assert_parse_ok_eq,
+};
+use super::{FloatLit, FloatType};
+
+
+// ===== Utility functions =======================================================================
+
+/// Helper macro to check parsing a float.
+///
+/// This macro contains quite a bit of logic itself (which can be buggy of
+/// course), so we have a few test functions below to test a bunch of cases
+/// manually.
+macro_rules! check {
+ ($intpart:literal $fracpart:literal $exppart:literal $suffix:tt) => {
+ let input = concat!($intpart, $fracpart, $exppart, check!(@stringify_suffix $suffix));
+ let expected_float = FloatLit {
+ number_part: concat!($intpart, $fracpart, $exppart),
+ end_integer_part: $intpart.len(),
+ end_fractional_part: $intpart.len() + $fracpart.len(),
+ type_suffix: check!(@ty $suffix),
+ };
+
+ assert_parse_ok_eq(
+ input, FloatLit::parse(input), expected_float.clone(), "FloatLit::parse");
+ assert_parse_ok_eq(
+ input, Literal::parse(input), Literal::Float(expected_float), "Literal::parse");
+
+ };
+ (@ty f32) => { Some(FloatType::F32) };
+ (@ty f64) => { Some(FloatType::F64) };
+ (@ty -) => { None };
+ (@stringify_suffix -) => { "" };
+ (@stringify_suffix $suffix:ident) => { stringify!($suffix) };
+}
+
+
+// ===== Actual tests ===========================================================================
+
+#[test]
+fn manual_without_suffix() -> Result<(), ParseError> {
+ let f = FloatLit::parse("3.14")?;
+ assert_eq!(f.number_part(), "3.14");
+ assert_eq!(f.integer_part(), "3");
+ assert_eq!(f.fractional_part(), Some("14"));
+ assert_eq!(f.exponent_part(), "");
+ assert_eq!(f.type_suffix(), None);
+
+ let f = FloatLit::parse("9.")?;
+ assert_eq!(f.number_part(), "9.");
+ assert_eq!(f.integer_part(), "9");
+ assert_eq!(f.fractional_part(), Some(""));
+ assert_eq!(f.exponent_part(), "");
+ assert_eq!(f.type_suffix(), None);
+
+ let f = FloatLit::parse("8e1")?;
+ assert_eq!(f.number_part(), "8e1");
+ assert_eq!(f.integer_part(), "8");
+ assert_eq!(f.fractional_part(), None);
+ assert_eq!(f.exponent_part(), "e1");
+ assert_eq!(f.type_suffix(), None);
+
+ let f = FloatLit::parse("8E3")?;
+ assert_eq!(f.number_part(), "8E3");
+ assert_eq!(f.integer_part(), "8");
+ assert_eq!(f.fractional_part(), None);
+ assert_eq!(f.exponent_part(), "E3");
+ assert_eq!(f.type_suffix(), None);
+
+ let f = FloatLit::parse("8_7_6.1_23e15")?;
+ assert_eq!(f.number_part(), "8_7_6.1_23e15");
+ assert_eq!(f.integer_part(), "8_7_6");
+ assert_eq!(f.fractional_part(), Some("1_23"));
+ assert_eq!(f.exponent_part(), "e15");
+ assert_eq!(f.type_suffix(), None);
+
+ let f = FloatLit::parse("8.2e-_04_9")?;
+ assert_eq!(f.number_part(), "8.2e-_04_9");
+ assert_eq!(f.integer_part(), "8");
+ assert_eq!(f.fractional_part(), Some("2"));
+ assert_eq!(f.exponent_part(), "e-_04_9");
+ assert_eq!(f.type_suffix(), None);
+
+ Ok(())
+}
+
+#[test]
+fn manual_with_suffix() -> Result<(), ParseError> {
+ let f = FloatLit::parse("3.14f32")?;
+ assert_eq!(f.number_part(), "3.14");
+ assert_eq!(f.integer_part(), "3");
+ assert_eq!(f.fractional_part(), Some("14"));
+ assert_eq!(f.exponent_part(), "");
+ assert_eq!(f.type_suffix(), Some(FloatType::F32));
+
+ let f = FloatLit::parse("8e1f64")?;
+ assert_eq!(f.number_part(), "8e1");
+ assert_eq!(f.integer_part(), "8");
+ assert_eq!(f.fractional_part(), None);
+ assert_eq!(f.exponent_part(), "e1");
+ assert_eq!(f.type_suffix(), Some(FloatType::F64));
+
+ let f = FloatLit::parse("8_7_6.1_23e15f32")?;
+ assert_eq!(f.number_part(), "8_7_6.1_23e15");
+ assert_eq!(f.integer_part(), "8_7_6");
+ assert_eq!(f.fractional_part(), Some("1_23"));
+ assert_eq!(f.exponent_part(), "e15");
+ assert_eq!(f.type_suffix(), Some(FloatType::F32));
+
+ let f = FloatLit::parse("8.2e-_04_9f64")?;
+ assert_eq!(f.number_part(), "8.2e-_04_9");
+ assert_eq!(f.integer_part(), "8");
+ assert_eq!(f.fractional_part(), Some("2"));
+ assert_eq!(f.exponent_part(), "e-_04_9");
+ assert_eq!(f.type_suffix(), Some(FloatType::F64));
+
+ Ok(())
+}
+
+#[test]
+fn simple() {
+ check!("3" ".14" "" -);
+ check!("3" ".14" "" f32);
+ check!("3" ".14" "" f64);
+
+ check!("3" "" "" f32);
+ check!("3" "" "e987654321" -);
+ check!("3" "" "e987654321" f64);
+
+ check!("42_888" ".05" "" -);
+ check!("42_888" ".05" "E5___" f32);
+ check!("123456789" "" "e_1" f64);
+ check!("123456789" ".99" "e_1" f64);
+ check!("123456789" ".99" "" f64);
+ check!("123456789" ".99" "" -);
+
+ check!("147" ".3_33" "" -);
+ check!("147" ".3_33__" "E3" f64);
+ check!("147" ".3_33__" "" f32);
+
+ check!("147" ".333" "e-10" -);
+ check!("147" ".333" "e-_7" f32);
+ check!("147" ".333" "e+10" -);
+ check!("147" ".333" "e+_7" f32);
+
+ check!("86" "." "" -);
+ check!("0" "." "" -);
+ check!("0_" "." "" -);
+ check!("0" ".0000001" "" -);
+ check!("0" ".000_0001" "" -);
+
+ check!("0" ".0" "e+0" -);
+ check!("0" "" "E+0" -);
+ check!("34" "" "e+0" -);
+ check!("0" ".9182" "E+0" f32);
+}
+
+#[test]
+fn parse_err() {
+ assert_err!(FloatLit, "", Empty, None);
+ assert_err_single!(FloatLit::parse("."), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("+"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("-"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("e"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("e8"), DoesNotStartWithDigit, 0);
+ assert_err!(FloatLit, "0e", NoExponentDigits, 1..2);
+ assert_err_single!(FloatLit::parse("f32"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("foo"), DoesNotStartWithDigit, 0);
+
+ assert_err_single!(FloatLit::parse("inf"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("nan"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("NaN"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("NAN"), DoesNotStartWithDigit, 0);
+
+ assert_err_single!(FloatLit::parse("_2.7"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse(".5"), DoesNotStartWithDigit, 0);
+ assert_err_single!(FloatLit::parse("0x44.5"), InvalidFloatTypeSuffix, 1..6);
+ assert_err!(FloatLit, "1e", NoExponentDigits, 1..2);
+ assert_err!(FloatLit, "1.e4", UnexpectedChar, 2);
+ assert_err!(FloatLit, "3._4", UnexpectedChar, 2);
+ assert_err!(FloatLit, "12345._987", UnexpectedChar, 6);
+ assert_err!(FloatLit, "46._", UnexpectedChar, 3);
+ assert_err!(FloatLit, "46.f32", UnexpectedChar, 3);
+ assert_err!(FloatLit, "46.e3", UnexpectedChar, 3);
+ assert_err!(FloatLit, "46._e3", UnexpectedChar, 3);
+ assert_err!(FloatLit, "46.e3f64", UnexpectedChar, 3);
+ assert_err!(FloatLit, "23.4e_", NoExponentDigits, 4..6);
+ assert_err!(FloatLit, "23E___f32", NoExponentDigits, 2..6);
+ assert_err!(FloatLit, "7f23", InvalidFloatTypeSuffix, 1..4);
+ assert_err!(FloatLit, "7f320", InvalidFloatTypeSuffix, 1..5);
+ assert_err!(FloatLit, "7f64_", InvalidFloatTypeSuffix, 1..5);
+ assert_err!(FloatLit, "8f649", InvalidFloatTypeSuffix, 1..5);
+ assert_err!(FloatLit, "8f64f32", InvalidFloatTypeSuffix, 1..7);
+ assert_err!(FloatLit, "55e3.1", InvalidFloatTypeSuffix, 4..6); // suboptimal
+
+ assert_err!(FloatLit, "3.7+", InvalidFloatTypeSuffix, 3..4);
+ assert_err!(FloatLit, "3.7+2", InvalidFloatTypeSuffix, 3..5);
+ assert_err!(FloatLit, "3.7-", InvalidFloatTypeSuffix, 3..4);
+ assert_err!(FloatLit, "3.7-2", InvalidFloatTypeSuffix, 3..5);
+ assert_err!(FloatLit, "3.7e+", NoExponentDigits, 3..5);
+ assert_err!(FloatLit, "3.7e-", NoExponentDigits, 3..5);
+ assert_err!(FloatLit, "3.7e-+3", NoExponentDigits, 3..5); // suboptimal
+ assert_err!(FloatLit, "3.7e+-3", NoExponentDigits, 3..5); // suboptimal
+}
diff --git a/src/impls.rs b/src/impls.rs
new file mode 100644
index 0000000..251f350
--- /dev/null
+++ b/src/impls.rs
@@ -0,0 +1,339 @@
+use std::convert::TryFrom;
+
+use crate::{Literal, err::{InvalidToken, TokenKind}};
+
+
+/// Helper macro to call a `callback` macro four times for all combinations of
+/// `proc_macro`/`proc_macro2` and `&`/owned.
+macro_rules! helper {
+ ($callback:ident, $($input:tt)*) => {
+ $callback!([proc_macro::] => $($input)*);
+ $callback!([&proc_macro::] => $($input)*);
+ #[cfg(feature = "proc-macro2")]
+ $callback!([proc_macro2::] => $($input)*);
+ #[cfg(feature = "proc-macro2")]
+ $callback!([&proc_macro2::] => $($input)*);
+ };
+}
+
+
+// ==============================================================================================
+// ===== `From<*Lit> for Literal`
+// ==============================================================================================
+
+macro_rules! impl_specific_lit_to_lit {
+ ($ty:ty, $variant:ident) => {
+ impl<B: crate::Buffer> From<$ty> for Literal<B> {
+ fn from(src: $ty) -> Self {
+ Literal::$variant(src)
+ }
+ }
+ };
+}
+
+impl_specific_lit_to_lit!(crate::BoolLit, Bool);
+impl_specific_lit_to_lit!(crate::IntegerLit<B>, Integer);
+impl_specific_lit_to_lit!(crate::FloatLit<B>, Float);
+impl_specific_lit_to_lit!(crate::CharLit<B>, Char);
+impl_specific_lit_to_lit!(crate::StringLit<B>, String);
+impl_specific_lit_to_lit!(crate::ByteLit<B>, Byte);
+impl_specific_lit_to_lit!(crate::ByteStringLit<B>, ByteString);
+
+
+
+// ==============================================================================================
+// ===== `From<pm::Literal> for Literal`
+// ==============================================================================================
+
+
+macro_rules! impl_tt_to_lit {
+ ([$($prefix:tt)*] => ) => {
+ impl From<$($prefix)* Literal> for Literal<String> {
+ fn from(src: $($prefix)* Literal) -> Self {
+ // We call `expect` in all these impls: this library aims to implement exactly
+ // the Rust grammar, so if we have a valid Rust literal, we should always be
+ // able to parse it.
+ Self::parse(src.to_string())
+ .expect("bug: failed to parse output of `Literal::to_string`")
+ }
+ }
+ }
+}
+
+helper!(impl_tt_to_lit, );
+
+
+// ==============================================================================================
+// ===== `TryFrom<pm::TokenTree> for Literal`
+// ==============================================================================================
+
+macro_rules! impl_tt_to_lit {
+ ([$($prefix:tt)*] => ) => {
+ impl TryFrom<$($prefix)* TokenTree> for Literal<String> {
+ type Error = InvalidToken;
+ fn try_from(tt: $($prefix)* TokenTree) -> Result<Self, Self::Error> {
+ let span = tt.span();
+ let res = match tt {
+ $($prefix)* TokenTree::Group(_) => Err(TokenKind::Group),
+ $($prefix)* TokenTree::Punct(_) => Err(TokenKind::Punct),
+ $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "true"
+ => return Ok(Literal::Bool(crate::BoolLit::True)),
+ $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "false"
+ => return Ok(Literal::Bool(crate::BoolLit::False)),
+ $($prefix)* TokenTree::Ident(_) => Err(TokenKind::Ident),
+ $($prefix)* TokenTree::Literal(ref lit) => Ok(lit),
+ };
+
+ match res {
+ Ok(lit) => Ok(From::from(lit)),
+ Err(actual) => Err(InvalidToken {
+ actual,
+ expected: TokenKind::Literal,
+ span: span.into(),
+ }),
+ }
+ }
+ }
+ }
+}
+
+helper!(impl_tt_to_lit, );
+
+
+// ==============================================================================================
+// ===== `TryFrom<pm::Literal> for *Lit` and `TryFrom<pm::TokenTree> for *Lit`
+// ==============================================================================================
+
+fn kind_of(lit: &Literal<String>) -> TokenKind {
+ match lit {
+ Literal::String(_) => TokenKind::StringLit,
+ Literal::Bool(_) => TokenKind::BoolLit,
+ Literal::Integer(_) => TokenKind::IntegerLit,
+ Literal::Float(_) => TokenKind::FloatLit,
+ Literal::Char(_) => TokenKind::CharLit,
+ Literal::Byte(_) => TokenKind::ByteLit,
+ Literal::ByteString(_) => TokenKind::ByteStringLit,
+ }
+}
+
+macro_rules! impl_for_specific_lit {
+ ([$($prefix:tt)*] => $ty:ty, $variant:ident, $kind:ident) => {
+ impl TryFrom<$($prefix)* Literal> for $ty {
+ type Error = InvalidToken;
+ fn try_from(src: $($prefix)* Literal) -> Result<Self, Self::Error> {
+ let span = src.span();
+ let lit: Literal<String> = src.into();
+ match lit {
+ Literal::$variant(s) => Ok(s),
+ other => Err(InvalidToken {
+ expected: TokenKind::$kind,
+ actual: kind_of(&other),
+ span: span.into(),
+ }),
+ }
+ }
+ }
+
+ impl TryFrom<$($prefix)* TokenTree> for $ty {
+ type Error = InvalidToken;
+ fn try_from(tt: $($prefix)* TokenTree) -> Result<Self, Self::Error> {
+ let span = tt.span();
+ let res = match tt {
+ $($prefix)* TokenTree::Group(_) => Err(TokenKind::Group),
+ $($prefix)* TokenTree::Punct(_) => Err(TokenKind::Punct),
+ $($prefix)* TokenTree::Ident(_) => Err(TokenKind::Ident),
+ $($prefix)* TokenTree::Literal(ref lit) => Ok(lit),
+ };
+
+ match res {
+ Ok(lit) => <$ty>::try_from(lit),
+ Err(actual) => Err(InvalidToken {
+ actual,
+ expected: TokenKind::$kind,
+ span: span.into(),
+ }),
+ }
+ }
+ }
+ };
+}
+
+helper!(impl_for_specific_lit, crate::IntegerLit<String>, Integer, IntegerLit);
+helper!(impl_for_specific_lit, crate::FloatLit<String>, Float, FloatLit);
+helper!(impl_for_specific_lit, crate::CharLit<String>, Char, CharLit);
+helper!(impl_for_specific_lit, crate::StringLit<String>, String, StringLit);
+helper!(impl_for_specific_lit, crate::ByteLit<String>, Byte, ByteLit);
+helper!(impl_for_specific_lit, crate::ByteStringLit<String>, ByteString, ByteStringLit);
+
+macro_rules! impl_from_tt_for_bool {
+ ([$($prefix:tt)*] => ) => {
+ impl TryFrom<$($prefix)* TokenTree> for crate::BoolLit {
+ type Error = InvalidToken;
+ fn try_from(tt: $($prefix)* TokenTree) -> Result<Self, Self::Error> {
+ let span = tt.span();
+ let actual = match tt {
+ $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "true"
+ => return Ok(crate::BoolLit::True),
+ $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "false"
+ => return Ok(crate::BoolLit::False),
+
+ $($prefix)* TokenTree::Group(_) => TokenKind::Group,
+ $($prefix)* TokenTree::Punct(_) => TokenKind::Punct,
+ $($prefix)* TokenTree::Ident(_) => TokenKind::Ident,
+ $($prefix)* TokenTree::Literal(ref lit) => kind_of(&Literal::from(lit)),
+ };
+
+ Err(InvalidToken {
+ actual,
+ expected: TokenKind::BoolLit,
+ span: span.into(),
+ })
+ }
+ }
+ };
+}
+
+helper!(impl_from_tt_for_bool, );
+
+
+mod tests {
+ //! # Tests
+ //!
+ //! ```no_run
+ //! extern crate proc_macro;
+ //!
+ //! use std::convert::TryFrom;
+ //! use litrs::Literal;
+ //!
+ //! fn give<T>() -> T {
+ //! panic!()
+ //! }
+ //!
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::BoolLit>());
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::IntegerLit<String>>());
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::FloatLit<String>>());
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::CharLit<String>>());
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::StringLit<String>>());
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::ByteLit<String>>());
+ //! let _ = litrs::Literal::<String>::from(give::<litrs::ByteStringLit<String>>());
+ //!
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::BoolLit>());
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::IntegerLit<&'static str>>());
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::FloatLit<&'static str>>());
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::CharLit<&'static str>>());
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::StringLit<&'static str>>());
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::ByteLit<&'static str>>());
+ //! let _ = litrs::Literal::<&'static str>::from(give::<litrs::ByteStringLit<&'static str>>());
+ //!
+ //!
+ //! let _ = litrs::Literal::from(give::<proc_macro::Literal>());
+ //! let _ = litrs::Literal::from(give::<&proc_macro::Literal>());
+ //!
+ //! let _ = litrs::Literal::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::Literal::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //!
+ //! let _ = litrs::IntegerLit::try_from(give::<proc_macro::Literal>());
+ //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro::Literal>());
+ //!
+ //! let _ = litrs::FloatLit::try_from(give::<proc_macro::Literal>());
+ //! let _ = litrs::FloatLit::try_from(give::<&proc_macro::Literal>());
+ //!
+ //! let _ = litrs::CharLit::try_from(give::<proc_macro::Literal>());
+ //! let _ = litrs::CharLit::try_from(give::<&proc_macro::Literal>());
+ //!
+ //! let _ = litrs::StringLit::try_from(give::<proc_macro::Literal>());
+ //! let _ = litrs::StringLit::try_from(give::<&proc_macro::Literal>());
+ //!
+ //! let _ = litrs::ByteLit::try_from(give::<proc_macro::Literal>());
+ //! let _ = litrs::ByteLit::try_from(give::<&proc_macro::Literal>());
+ //!
+ //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro::Literal>());
+ //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro::Literal>());
+ //!
+ //!
+ //! let _ = litrs::BoolLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::BoolLit::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //! let _ = litrs::IntegerLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //! let _ = litrs::FloatLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::FloatLit::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //! let _ = litrs::CharLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::CharLit::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //! let _ = litrs::StringLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::StringLit::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //! let _ = litrs::ByteLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::ByteLit::try_from(give::<&proc_macro::TokenTree>());
+ //!
+ //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro::TokenTree>());
+ //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro::TokenTree>());
+ //! ```
+}
+
+#[cfg(feature = "proc-macro2")]
+mod tests_proc_macro2 {
+ //! # Tests
+ //!
+ //! ```no_run
+ //! extern crate proc_macro;
+ //!
+ //! use std::convert::TryFrom;
+ //! use litrs::Literal;
+ //!
+ //! fn give<T>() -> T {
+ //! panic!()
+ //! }
+ //!
+ //! let _ = litrs::Literal::from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::Literal::from(give::<&proc_macro2::Literal>());
+ //!
+ //! let _ = litrs::Literal::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::Literal::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //!
+ //! let _ = litrs::IntegerLit::try_from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro2::Literal>());
+ //!
+ //! let _ = litrs::FloatLit::try_from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::FloatLit::try_from(give::<&proc_macro2::Literal>());
+ //!
+ //! let _ = litrs::CharLit::try_from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::CharLit::try_from(give::<&proc_macro2::Literal>());
+ //!
+ //! let _ = litrs::StringLit::try_from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::StringLit::try_from(give::<&proc_macro2::Literal>());
+ //!
+ //! let _ = litrs::ByteLit::try_from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::ByteLit::try_from(give::<&proc_macro2::Literal>());
+ //!
+ //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro2::Literal>());
+ //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro2::Literal>());
+ //!
+ //!
+ //! let _ = litrs::BoolLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::BoolLit::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //! let _ = litrs::IntegerLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //! let _ = litrs::FloatLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::FloatLit::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //! let _ = litrs::CharLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::CharLit::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //! let _ = litrs::StringLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::StringLit::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //! let _ = litrs::ByteLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::ByteLit::try_from(give::<&proc_macro2::TokenTree>());
+ //!
+ //! let _ = litrs::ByteStringLit::try_from(give::<proc_macro2::TokenTree>());
+ //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro2::TokenTree>());
+ //! ```
+}
diff --git a/src/integer/mod.rs b/src/integer/mod.rs
new file mode 100644
index 0000000..52519a6
--- /dev/null
+++ b/src/integer/mod.rs
@@ -0,0 +1,285 @@
+use std::fmt;
+
+use crate::{
+ Buffer, ParseError,
+ err::{perr, ParseErrorKind::*},
+ parse::{first_byte_or_empty, hex_digit_value},
+};
+
+
+/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
+///
+/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
+/// the main part (digits and underscores), and an optional type suffix
+/// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
+///
+/// Note that integer literals are always positive: the grammar does not contain
+/// the minus sign at all. The minus sign is just the unary negate operator,
+/// not part of the literal. Which is interesting for cases like `- 128i8`:
+/// here, the literal itself would overflow the specified type (`i8` cannot
+/// represent 128). That's why in rustc, the literal overflow check is
+/// performed as a lint after parsing, not during the lexing stage. Similarly,
+/// [`IntegerLit::parse`] does not perform an overflow check.
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub struct IntegerLit<B: Buffer> {
+ base: IntegerBase,
+ main_part: B,
+ type_suffix: Option<IntegerType>,
+}
+
+/// The bases in which an integer can be specified.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum IntegerBase {
+ Binary,
+ Octal,
+ Decimal,
+ Hexadecimal,
+}
+
+/// All possible integer type suffixes.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum IntegerType {
+ U8,
+ U16,
+ U32,
+ U64,
+ U128,
+ Usize,
+ I8,
+ I16,
+ I32,
+ I64,
+ I128,
+ Isize,
+}
+
+impl IntegerBase {
+ /// Returns the literal prefix that indicates this base, i.e. `"0b"`,
+ /// `"0o"`, `""` and `"0x"`.
+ pub fn prefix(self) -> &'static str {
+ match self {
+ Self::Binary => "0b",
+ Self::Octal => "0o",
+ Self::Decimal => "",
+ Self::Hexadecimal => "0x",
+ }
+ }
+}
+
+impl<B: Buffer> IntegerLit<B> {
+ /// Parses the input as an integer literal. Returns an error if the input is
+ /// invalid or represents a different kind of literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ match first_byte_or_empty(&input)? {
+ digit @ b'0'..=b'9' => Self::parse_impl(input, digit),
+ _ => Err(perr(0, DoesNotStartWithDigit)),
+ }
+ }
+
+ /// Performs the actual string to int conversion to obtain the integer
+ /// value. The optional type suffix of the literal **is ignored by this
+ /// method**. This means `N` does not need to match the type suffix!
+ ///
+ /// Returns `None` if the literal overflows `N`.
+ pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
+ let base = match self.base {
+ IntegerBase::Binary => N::from_small_number(2),
+ IntegerBase::Octal => N::from_small_number(8),
+ IntegerBase::Decimal => N::from_small_number(10),
+ IntegerBase::Hexadecimal => N::from_small_number(16),
+ };
+
+ let mut acc = N::from_small_number(0);
+ for digit in self.main_part.bytes() {
+ if digit == b'_' {
+ continue;
+ }
+
+ // We don't actually need the base here: we already know this main
+ // part only contains digits valid for the specified base.
+ let digit = hex_digit_value(digit)
+ .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
+
+ acc = acc.checked_mul(base)?;
+ acc = acc.checked_add(N::from_small_number(digit))?;
+ }
+
+ Some(acc)
+ }
+
+ /// The base of this integer literal.
+ pub fn base(&self) -> IntegerBase {
+ self.base
+ }
+
+ /// The main part containing the digits and potentially `_`. Do not try to
+ /// parse this directly as that would ignore the base!
+ pub fn raw_main_part(&self) -> &str {
+ &self.main_part
+ }
+
+ /// The type suffix, if specified.
+ pub fn type_suffix(&self) -> Option<IntegerType> {
+ self.type_suffix
+ }
+
+ /// Precondition: first byte of string has to be in `b'0'..=b'9'`.
+ pub(crate) fn parse_impl(input: B, first: u8) -> Result<Self, ParseError> {
+ // Figure out base and strip prefix base, if it exists.
+ let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
+ (b'0', Some(b'b')) => (2, IntegerBase::Binary),
+ (b'0', Some(b'o')) => (2, IntegerBase::Octal),
+ (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
+
+ // Everything else is treated as decimal. Several cases are caught
+ // by this:
+ // - "123"
+ // - "0"
+ // - "0u8"
+ // - "0r" -> this will error later
+ _ => (0, IntegerBase::Decimal),
+ };
+ let without_prefix = &input[end_prefix..];
+
+ // Find end of main part.
+ let end_main = without_prefix.bytes()
+ .position(|b| !matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'))
+ .unwrap_or(without_prefix.len());
+ let (main_part, type_suffix) = without_prefix.split_at(end_main);
+
+ // Check for invalid digits and make sure there is at least one valid digit.
+ let invalid_digit_pos = match base {
+ IntegerBase::Binary => main_part.bytes()
+ .position(|b| !matches!(b, b'0' | b'1' | b'_')),
+ IntegerBase::Octal => main_part.bytes()
+ .position(|b| !matches!(b, b'0'..=b'7' | b'_')),
+ IntegerBase::Decimal => main_part.bytes()
+ .position(|b| !matches!(b, b'0'..=b'9' | b'_')),
+ IntegerBase::Hexadecimal => None,
+ };
+
+ if let Some(pos) = invalid_digit_pos {
+ return Err(perr(end_prefix + pos, InvalidDigit));
+ }
+
+ if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
+ return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
+ }
+
+
+ // Parse type suffix
+ let type_suffix = match type_suffix {
+ "" => None,
+ "u8" => Some(IntegerType::U8),
+ "u16" => Some(IntegerType::U16),
+ "u32" => Some(IntegerType::U32),
+ "u64" => Some(IntegerType::U64),
+ "u128" => Some(IntegerType::U128),
+ "usize" => Some(IntegerType::Usize),
+ "i8" => Some(IntegerType::I8),
+ "i16" => Some(IntegerType::I16),
+ "i32" => Some(IntegerType::I32),
+ "i64" => Some(IntegerType::I64),
+ "i128" => Some(IntegerType::I128),
+ "isize" => Some(IntegerType::Isize),
+ _ => return Err(perr(end_main + end_prefix..input.len(), InvalidIntegerTypeSuffix)),
+ };
+
+ Ok(Self {
+ base,
+ main_part: input.cut(end_prefix..end_main + end_prefix),
+ type_suffix,
+ })
+ }
+}
+
+impl IntegerLit<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn to_owned(&self) -> IntegerLit<String> {
+ IntegerLit {
+ base: self.base,
+ main_part: self.main_part.to_owned(),
+ type_suffix: self.type_suffix,
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for IntegerLit<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let suffix = match self.type_suffix {
+ None => "",
+ Some(IntegerType::U8) => "u8",
+ Some(IntegerType::U16) => "u16",
+ Some(IntegerType::U32) => "u32",
+ Some(IntegerType::U64) => "u64",
+ Some(IntegerType::U128) => "u128",
+ Some(IntegerType::Usize) => "usize",
+ Some(IntegerType::I8) => "i8",
+ Some(IntegerType::I16) => "i16",
+ Some(IntegerType::I32) => "i32",
+ Some(IntegerType::I64) => "i64",
+ Some(IntegerType::I128) => "i128",
+ Some(IntegerType::Isize) => "isize",
+ };
+ write!(f, "{}{}{}", self.base.prefix(), &*self.main_part, suffix)
+ }
+}
+
+/// Integer literal types. *Implementation detail*.
+///
+/// Implemented for all integer literal types. This trait is sealed and cannot
+/// be implemented outside of this crate. The trait's methods are implementation
+/// detail of this library and are not subject to semver.
+pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
+ /// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
+ #[doc(hidden)]
+ fn from_small_number(n: u8) -> Self;
+
+ #[doc(hidden)]
+ fn checked_add(self, rhs: Self) -> Option<Self>;
+
+ #[doc(hidden)]
+ fn checked_mul(self, rhs: Self) -> Option<Self>;
+
+ #[doc(hidden)]
+ fn ty() -> IntegerType;
+}
+
+macro_rules! impl_from_int_literal {
+ ($( $ty:ty => $variant:ident ,)* ) => {
+ $(
+ impl self::sealed::Sealed for $ty {}
+ impl FromIntegerLiteral for $ty {
+ fn from_small_number(n: u8) -> Self {
+ n as Self
+ }
+ fn checked_add(self, rhs: Self) -> Option<Self> {
+ self.checked_add(rhs)
+ }
+ fn checked_mul(self, rhs: Self) -> Option<Self> {
+ self.checked_mul(rhs)
+ }
+ fn ty() -> IntegerType {
+ IntegerType::$variant
+ }
+ }
+ )*
+ };
+}
+
+impl_from_int_literal!(
+ u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
+ i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
+);
+
+mod sealed {
+ pub trait Sealed {}
+}
+
+
+#[cfg(test)]
+mod tests;
diff --git a/src/integer/tests.rs b/src/integer/tests.rs
new file mode 100644
index 0000000..dc8c861
--- /dev/null
+++ b/src/integer/tests.rs
@@ -0,0 +1,336 @@
+use std::fmt::{Debug, Display};
+use crate::{
+ FromIntegerLiteral, Literal, IntegerLit, IntegerType as Ty, IntegerBase, IntegerBase::*,
+ test_util::assert_parse_ok_eq,
+};
+
+
+// ===== Utility functions =======================================================================
+
+#[track_caller]
+fn check<T: FromIntegerLiteral + PartialEq + Debug + Display>(
+ input: &str,
+ value: T,
+ base: IntegerBase,
+ main_part: &str,
+ type_suffix: Option<Ty>,
+) {
+ let expected_integer = IntegerLit { base, main_part, type_suffix };
+ assert_parse_ok_eq(
+ input, IntegerLit::parse(input), expected_integer.clone(), "IntegerLit::parse");
+ assert_parse_ok_eq(
+ input, Literal::parse(input), Literal::Integer(expected_integer), "Literal::parse");
+
+ let actual_value = IntegerLit::parse(input)
+ .unwrap()
+ .value::<T>()
+ .unwrap_or_else(|| panic!("unexpected overflow in `IntegerLit::value` for `{}`", input));
+ if actual_value != value {
+ panic!(
+ "Parsing int literal `{}` should give value `{}`, but actually resulted in `{}`",
+ input,
+ value,
+ actual_value,
+ );
+ }
+}
+
+
+// ===== Actual tests ===========================================================================
+
+#[test]
+fn parse_decimal() {
+ check("0", 0u128, Decimal, "0", None);
+ check("1", 1u8, Decimal, "1", None);
+ check("8", 8u16, Decimal, "8", None);
+ check("9", 9u32, Decimal, "9", None);
+ check("10", 10u64, Decimal, "10", None);
+ check("11", 11i8, Decimal, "11", None);
+ check("123456789", 123456789i128, Decimal, "123456789", None);
+
+ check("05", 5i16, Decimal, "05", None);
+ check("00005", 5i32, Decimal, "00005", None);
+ check("0123456789", 123456789i64, Decimal, "0123456789", None);
+
+ check("123_456_789", 123_456_789, Decimal, "123_456_789", None);
+ check("0___4", 4, Decimal, "0___4", None);
+ check("0___4_3", 43, Decimal, "0___4_3", None);
+ check("0___4_3", 43, Decimal, "0___4_3", None);
+ check("123___________", 123, Decimal, "123___________", None);
+
+ check(
+ "340282366920938463463374607431768211455",
+ 340282366920938463463374607431768211455u128,
+ Decimal,
+ "340282366920938463463374607431768211455",
+ None,
+ );
+ check(
+ "340_282_366_920_938_463_463_374_607_431_768_211_455",
+ 340282366920938463463374607431768211455u128,
+ Decimal,
+ "340_282_366_920_938_463_463_374_607_431_768_211_455",
+ None,
+ );
+ check(
+ "3_40_282_3669_20938_463463_3746074_31768211_455___",
+ 340282366920938463463374607431768211455u128,
+ Decimal,
+ "3_40_282_3669_20938_463463_3746074_31768211_455___",
+ None,
+ );
+}
+
+#[test]
+fn parse_binary() {
+ check("0b0", 0b0, Binary, "0", None);
+ check("0b000", 0b000, Binary, "000", None);
+ check("0b1", 0b1, Binary, "1", None);
+ check("0b01", 0b01, Binary, "01", None);
+ check("0b101010", 0b101010, Binary, "101010", None);
+ check("0b10_10_10", 0b10_10_10, Binary, "10_10_10", None);
+ check("0b01101110____", 0b01101110____, Binary, "01101110____", None);
+
+ check("0b10010u8", 0b10010u8, Binary, "10010", Some(Ty::U8));
+ check("0b10010i8", 0b10010u8, Binary, "10010", Some(Ty::I8));
+ check("0b10010u64", 0b10010u64, Binary, "10010", Some(Ty::U64));
+ check("0b10010i64", 0b10010u64, Binary, "10010", Some(Ty::I64));
+ check(
+ "0b1011001_00110000_00101000_10100101u32",
+ 0b1011001_00110000_00101000_10100101u32,
+ Binary,
+ "1011001_00110000_00101000_10100101",
+ Some(Ty::U32),
+ );
+}
+
+#[test]
+fn parse_octal() {
+ check("0o0", 0o0, Octal, "0", None);
+ check("0o1", 0o1, Octal, "1", None);
+ check("0o6", 0o6, Octal, "6", None);
+ check("0o7", 0o7, Octal, "7", None);
+ check("0o17", 0o17, Octal, "17", None);
+ check("0o123", 0o123, Octal, "123", None);
+ check("0o7654321", 0o7654321, Octal, "7654321", None);
+ check("0o7_53_1", 0o7_53_1, Octal, "7_53_1", None);
+ check("0o66_", 0o66_, Octal, "66_", None);
+
+ check("0o755u16", 0o755u16, Octal, "755", Some(Ty::U16));
+ check("0o755i128", 0o755i128, Octal, "755", Some(Ty::I128));
+}
+
+#[test]
+fn parse_hexadecimal() {
+ check("0x0", 0x0, Hexadecimal, "0", None);
+ check("0x1", 0x1, Hexadecimal, "1", None);
+ check("0x9", 0x9, Hexadecimal, "9", None);
+
+ check("0xa", 0xa, Hexadecimal, "a", None);
+ check("0xf", 0xf, Hexadecimal, "f", None);
+ check("0x17", 0x17, Hexadecimal, "17", None);
+ check("0x1b", 0x1b, Hexadecimal, "1b", None);
+ check("0x123", 0x123, Hexadecimal, "123", None);
+ check("0xace", 0xace, Hexadecimal, "ace", None);
+ check("0xfdb971", 0xfdb971, Hexadecimal, "fdb971", None);
+ check("0xa_54_f", 0xa_54_f, Hexadecimal, "a_54_f", None);
+ check("0x6d_", 0x6d_, Hexadecimal, "6d_", None);
+
+ check("0xA", 0xA, Hexadecimal, "A", None);
+ check("0xF", 0xF, Hexadecimal, "F", None);
+ check("0x17", 0x17, Hexadecimal, "17", None);
+ check("0x1B", 0x1B, Hexadecimal, "1B", None);
+ check("0x123", 0x123, Hexadecimal, "123", None);
+ check("0xACE", 0xACE, Hexadecimal, "ACE", None);
+ check("0xFDB971", 0xFDB971, Hexadecimal, "FDB971", None);
+ check("0xA_54_F", 0xA_54_F, Hexadecimal, "A_54_F", None);
+ check("0x6D_", 0x6D_, Hexadecimal, "6D_", None);
+
+ check("0xFdB97a1", 0xFdB97a1, Hexadecimal, "FdB97a1", None);
+ check("0xfdB97A1", 0xfdB97A1, Hexadecimal, "fdB97A1", None);
+
+ check("0x40u16", 0x40u16, Hexadecimal, "40", Some(Ty::U16));
+ check("0xffi128", 0xffi128, Hexadecimal, "ff", Some(Ty::I128));
+}
+
+#[test]
+fn starting_underscore() {
+ check("0b_1", 1, Binary, "_1", None);
+ check("0b_010i16", 0b_010, Binary, "_010", Some(Ty::I16));
+
+ check("0o_5", 5, Octal, "_5", None);
+ check("0o_750u128", 0o_750u128, Octal, "_750", Some(Ty::U128));
+
+ check("0x_c", 0xc, Hexadecimal, "_c", None);
+ check("0x_cf3i8", 0x_cf3, Hexadecimal, "_cf3", Some(Ty::I8));
+}
+
+#[test]
+fn parse_overflowing_just_fine() {
+ check("256u8", 256u16, Decimal, "256", Some(Ty::U8));
+ check("123_456_789u8", 123_456_789u32, Decimal, "123_456_789", Some(Ty::U8));
+ check("123_456_789u16", 123_456_789u32, Decimal, "123_456_789", Some(Ty::U16));
+
+ check("123_123_456_789u8", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U8));
+ check("123_123_456_789u16", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U16));
+ check("123_123_456_789u32", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U32));
+}
+
+#[test]
+fn suffixes() {
+ [
+ ("123i8", Ty::I8),
+ ("123i16", Ty::I16),
+ ("123i32", Ty::I32),
+ ("123i64", Ty::I64),
+ ("123i128", Ty::I128),
+ ("123u8", Ty::U8),
+ ("123u16", Ty::U16),
+ ("123u32", Ty::U32),
+ ("123u64", Ty::U64),
+ ("123u128", Ty::U128),
+ ].iter().for_each(|&(s, ty)| {
+ assert_eq!(IntegerLit::parse(s).unwrap().type_suffix(), Some(ty));
+ });
+}
+
+#[test]
+fn overflow_u128() {
+ let inputs = [
+ "340282366920938463463374607431768211456",
+ "0x100000000000000000000000000000000",
+ "0o4000000000000000000000000000000000000000000",
+ "0b1000000000000000000000000000000000000000000000000000000000000000000\
+ 00000000000000000000000000000000000000000000000000000000000000",
+ "340282366920938463463374607431768211456u128",
+ "340282366920938463463374607431768211457",
+ "3_40_282_3669_20938_463463_3746074_31768211_456___",
+ "3_40_282_3669_20938_463463_3746074_31768211_455___1",
+ "3_40_282_3669_20938_463463_3746074_31768211_455___0u128",
+ "3402823669209384634633746074317682114570",
+ ];
+
+ for &input in &inputs {
+ let lit = IntegerLit::parse(input).expect("failed to parse");
+ assert!(lit.value::<u128>().is_none());
+ }
+}
+
+#[test]
+fn overflow_u8() {
+ let inputs = [
+ "256", "0x100", "0o400", "0b100000000",
+ "257", "0x101", "0o401", "0b100000001",
+ "300",
+ "1548",
+ "2548985",
+ "256u128",
+ "256u8",
+ "2_5_6",
+ "256_____1",
+ "256__",
+ ];
+
+ for &input in &inputs {
+ let lit = IntegerLit::parse(input).expect("failed to parse");
+ assert!(lit.value::<u8>().is_none());
+ }
+}
+
+#[test]
+fn parse_err() {
+ assert_err!(IntegerLit, "", Empty, None);
+ assert_err_single!(IntegerLit::parse("a"), DoesNotStartWithDigit, 0);
+ assert_err_single!(IntegerLit::parse(";"), DoesNotStartWithDigit, 0);
+ assert_err_single!(IntegerLit::parse("0;"), InvalidIntegerTypeSuffix, 1..2);
+ assert_err_single!(IntegerLit::parse("0a"), InvalidDigit, 1);
+ assert_err!(IntegerLit, "0b", NoDigits, 2..2);
+ assert_err_single!(IntegerLit::parse("0z"), InvalidIntegerTypeSuffix, 1..2);
+ assert_err_single!(IntegerLit::parse(" 0"), DoesNotStartWithDigit, 0);
+ assert_err_single!(IntegerLit::parse("0 "), InvalidIntegerTypeSuffix, 1);
+ assert_err_single!(IntegerLit::parse("0a3"), InvalidDigit, 1);
+ assert_err!(IntegerLit, "0b3", InvalidDigit, 2);
+ assert_err_single!(IntegerLit::parse("0z3"), InvalidIntegerTypeSuffix, 1..3);
+ assert_err_single!(IntegerLit::parse("_"), DoesNotStartWithDigit, 0);
+ assert_err_single!(IntegerLit::parse("_3"), DoesNotStartWithDigit, 0);
+}
+
+#[test]
+fn invalid_digits() {
+ assert_err!(IntegerLit, "0b10201", InvalidDigit, 4);
+ assert_err!(IntegerLit, "0b9", InvalidDigit, 2);
+ assert_err!(IntegerLit, "0b07", InvalidDigit, 3);
+ assert_err!(IntegerLit, "0b0a", InvalidDigit, 3);
+ assert_err!(IntegerLit, "0b0A", InvalidDigit, 3);
+ assert_err!(IntegerLit, "0b01f", InvalidDigit, 4);
+ assert_err!(IntegerLit, "0b01F", InvalidDigit, 4);
+
+ assert_err!(IntegerLit, "0o12380", InvalidDigit, 5);
+ assert_err!(IntegerLit, "0o192", InvalidDigit, 3);
+ assert_err!(IntegerLit, "0o7a_", InvalidDigit, 3);
+ assert_err!(IntegerLit, "0o7A_", InvalidDigit, 3);
+ assert_err!(IntegerLit, "0o72f_0", InvalidDigit, 4);
+ assert_err!(IntegerLit, "0o72F_0", InvalidDigit, 4);
+
+ assert_err_single!(IntegerLit::parse("12a3"), InvalidDigit, 2);
+ assert_err_single!(IntegerLit::parse("12f3"), InvalidDigit, 2);
+ assert_err_single!(IntegerLit::parse("12f_"), InvalidDigit, 2);
+ assert_err_single!(IntegerLit::parse("12F_"), InvalidDigit, 2);
+ assert_err_single!(IntegerLit::parse("a_123"), DoesNotStartWithDigit, 0);
+ assert_err_single!(IntegerLit::parse("B_123"), DoesNotStartWithDigit, 0);
+
+ assert_err!(IntegerLit, "0x8cg", InvalidIntegerTypeSuffix, 4..5);
+ assert_err!(IntegerLit, "0x8cG", InvalidIntegerTypeSuffix, 4..5);
+ assert_err!(IntegerLit, "0x8c1h_", InvalidIntegerTypeSuffix, 5..7);
+ assert_err!(IntegerLit, "0x8c1H_", InvalidIntegerTypeSuffix, 5..7);
+ assert_err!(IntegerLit, "0x8czu16", InvalidIntegerTypeSuffix, 4..8);
+}
+
+#[test]
+fn no_valid_digits() {
+ assert_err!(IntegerLit, "0x_", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0x__", NoDigits, 2..4);
+ assert_err!(IntegerLit, "0x________", NoDigits, 2..10);
+ assert_err!(IntegerLit, "0x_i8", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0x_u8", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0x_isize", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0x_usize", NoDigits, 2..3);
+
+ assert_err!(IntegerLit, "0o_", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0o__", NoDigits, 2..4);
+ assert_err!(IntegerLit, "0o________", NoDigits, 2..10);
+ assert_err!(IntegerLit, "0o_i32", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0o_u32", NoDigits, 2..3);
+
+ assert_err!(IntegerLit, "0b_", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0b__", NoDigits, 2..4);
+ assert_err!(IntegerLit, "0b________", NoDigits, 2..10);
+ assert_err!(IntegerLit, "0b_i128", NoDigits, 2..3);
+ assert_err!(IntegerLit, "0b_u128", NoDigits, 2..3);
+}
+
+#[test]
+fn invalid_suffix() {
+ assert_err!(IntegerLit, "5u7", InvalidIntegerTypeSuffix, 1..3);
+ assert_err!(IntegerLit, "5u9", InvalidIntegerTypeSuffix, 1..3);
+ assert_err!(IntegerLit, "5u0", InvalidIntegerTypeSuffix, 1..3);
+ assert_err!(IntegerLit, "33u12", InvalidIntegerTypeSuffix, 2..5);
+ assert_err!(IntegerLit, "84u17", InvalidIntegerTypeSuffix, 2..5);
+ assert_err!(IntegerLit, "99u80", InvalidIntegerTypeSuffix, 2..5);
+ assert_err!(IntegerLit, "1234uu16", InvalidIntegerTypeSuffix, 4..8);
+
+ assert_err!(IntegerLit, "5i7", InvalidIntegerTypeSuffix, 1..3);
+ assert_err!(IntegerLit, "5i9", InvalidIntegerTypeSuffix, 1..3);
+ assert_err!(IntegerLit, "5i0", InvalidIntegerTypeSuffix, 1..3);
+ assert_err!(IntegerLit, "33i12", InvalidIntegerTypeSuffix, 2..5);
+ assert_err!(IntegerLit, "84i17", InvalidIntegerTypeSuffix, 2..5);
+ assert_err!(IntegerLit, "99i80", InvalidIntegerTypeSuffix, 2..5);
+ assert_err!(IntegerLit, "1234ii16", InvalidIntegerTypeSuffix, 4..8);
+
+ assert_err!(IntegerLit, "0ui32", InvalidIntegerTypeSuffix, 1..5);
+ assert_err!(IntegerLit, "1iu32", InvalidIntegerTypeSuffix, 1..5);
+ assert_err_single!(IntegerLit::parse("54321a64"), InvalidDigit, 5);
+ assert_err!(IntegerLit, "54321b64", InvalidDigit, 5);
+ assert_err!(IntegerLit, "54321x64", InvalidIntegerTypeSuffix, 5..8);
+ assert_err!(IntegerLit, "54321o64", InvalidIntegerTypeSuffix, 5..8);
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..bd81f56
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,288 @@
+//! Parsing and inspecting Rust literal tokens.
+//!
+//! This library offers functionality to parse Rust literals, i.e. tokens in the
+//! Rust programming language that represent fixed values. The grammar for
+//! those is defined [here][ref].
+//!
+//! This kind of functionality already exists in the crate `syn`. However, as
+//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was
+//! built. This crate also offers a bit more flexibility compared to `syn`
+//! (only regarding literals, of course).
+//!
+//! ---
+//!
+//! The main types of this library are [`Literal`], representing any kind of
+//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a
+//! specific kind of literal.
+//!
+//! There are different ways to obtain such a literal type:
+//!
+//! - **`parse`**: parses a `&str` or `String` and returns `Result<_,
+//! ParseError>`. For example: [`Literal::parse`] and
+//! [`IntegerLit::parse`].
+//!
+//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from
+//! the `proc_macro` crate into a `Literal` from this crate.
+//!
+//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a
+//! `proc_macro::Literal` into a specific literal type of this crate. If
+//! the input is a literal of a different kind, `Err(InvalidToken)` is
+//! returned.
+//!
+//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a
+//! literal type of this crate. An error is returned if the token tree is
+//! not a literal, or if you are trying to turn it into a specific kind of
+//! literal and the token tree is a different kind of literal.
+//!
+//! All of the `From` and `TryFrom` conversions also work for reference to
+//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is
+//! enabled (which it is by default), all these `From` and `TryFrom` impls also
+//! exist for the corresponding `proc_macro2` types.
+//!
+//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro.
+//! The `TryFrom<TokenTree>` impls check for those two special idents and
+//! return a `BoolLit` appropriately. For that reason, there is also no
+//! `TryFrom<proc_macro::Literal>` impl for `BoolLit`. The `proc_macro::Literal`
+//! simply cannot represent bool literals.
+//!
+//!
+//! # Examples
+//!
+//! In a proc-macro:
+//!
+//! ```ignore
+//! use std::convert::TryFrom;
+//! use proc_macro::TokenStream;
+//! use litrs::FloatLit;
+//!
+//! #[proc_macro]
+//! pub fn foo(input: TokenStream) -> TokenStream {
+//! let mut input = input.into_iter().collect::<Vec<_>>();
+//! if input.len() != 1 {
+//! // Please do proper error handling in your real code!
+//! panic!("expected exactly one token as input");
+//! }
+//! let token = input.remove(0);
+//!
+//! match FloatLit::try_from(token) {
+//! Ok(float_lit) => { /* do something */ }
+//! Err(e) => return e.to_compile_error(),
+//! }
+//!
+//! // Dummy output
+//! TokenStream::new()
+//! }
+//! ```
+//!
+//! Parsing from string:
+//!
+//! ```
+//! use litrs::{FloatLit, Literal};
+//!
+//! // Parse a specific kind of literal (float in this case):
+//! let float_lit = FloatLit::parse("3.14f32");
+//! assert!(float_lit.is_ok());
+//! assert_eq!(float_lit.unwrap().type_suffix(), Some(litrs::FloatType::F32));
+//! assert!(FloatLit::parse("'c'").is_err());
+//!
+//! // Parse any kind of literal. After parsing, you can inspect the literal
+//! // and decide what to do in each case.
+//! let lit = Literal::parse("0xff80").expect("failed to parse literal");
+//! match lit {
+//! Literal::Integer(lit) => { /* ... */ }
+//! Literal::Float(lit) => { /* ... */ }
+//! Literal::Bool(lit) => { /* ... */ }
+//! Literal::Char(lit) => { /* ... */ }
+//! Literal::String(lit) => { /* ... */ }
+//! Literal::Byte(lit) => { /* ... */ }
+//! Literal::ByteString(lit) => { /* ... */ }
+//! }
+//! ```
+//!
+//!
+//!
+//! # Crate features
+//!
+//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of
+//! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`].
+//!
+//!
+//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals
+//!
+
+#![deny(missing_debug_implementations)]
+
+extern crate proc_macro;
+
+#[cfg(test)]
+#[macro_use]
+mod test_util;
+
+#[cfg(test)]
+mod tests;
+
+mod bool;
+mod byte;
+mod bytestr;
+mod char;
+mod err;
+mod escape;
+mod float;
+mod impls;
+mod integer;
+mod parse;
+mod string;
+
+
+use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}};
+
+pub use self::{
+ bool::BoolLit,
+ byte::ByteLit,
+ bytestr::ByteStringLit,
+ char::CharLit,
+ err::{InvalidToken, ParseError},
+ float::{FloatLit, FloatType},
+ integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType},
+ string::StringLit,
+};
+
+
+// ==============================================================================================
+// ===== `Literal` and type defs
+// ==============================================================================================
+
+/// A literal which owns the underlying buffer.
+pub type OwnedLiteral = Literal<String>;
+
+/// A literal whose underlying buffer is borrowed.
+pub type SharedLiteral<'a> = Literal<&'a str>;
+
+/// A literal. This is the main type of this library.
+///
+/// This type is generic over the underlying buffer `B`, which can be `&str` or
+/// `String`. There are two useful type aliases: [`OwnedLiteral`] and
+/// [`SharedLiteral`].
+///
+/// To create this type, you have to either call [`Literal::parse`] with an
+/// input string or use the `From<_>` impls of this type. The impls are only
+/// available of the corresponding crate features are enabled (they are enabled
+/// by default).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Literal<B: Buffer> {
+ Bool(BoolLit),
+ Integer(IntegerLit<B>),
+ Float(FloatLit<B>),
+ Char(CharLit<B>),
+ String(StringLit<B>),
+ Byte(ByteLit<B>),
+ ByteString(ByteStringLit<B>),
+}
+
+impl Literal<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn into_owned(self) -> OwnedLiteral {
+ match self {
+ Literal::Bool(l) => Literal::Bool(l.to_owned()),
+ Literal::Integer(l) => Literal::Integer(l.to_owned()),
+ Literal::Float(l) => Literal::Float(l.to_owned()),
+ Literal::Char(l) => Literal::Char(l.to_owned()),
+ Literal::String(l) => Literal::String(l.into_owned()),
+ Literal::Byte(l) => Literal::Byte(l.to_owned()),
+ Literal::ByteString(l) => Literal::ByteString(l.into_owned()),
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for Literal<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Literal::Bool(l) => l.fmt(f),
+ Literal::Integer(l) => l.fmt(f),
+ Literal::Float(l) => l.fmt(f),
+ Literal::Char(l) => l.fmt(f),
+ Literal::String(l) => l.fmt(f),
+ Literal::Byte(l) => l.fmt(f),
+ Literal::ByteString(l) => l.fmt(f),
+ }
+ }
+}
+
+
+// ==============================================================================================
+// ===== Buffer
+// ==============================================================================================
+
+/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*.
+///
+/// This is trait is implementation detail of this library, cannot be
+/// implemented in other crates and is not subject to semantic versioning.
+/// `litrs` only gurantees that this trait is implemented for `String` and
+/// `for<'a> &'a str`.
+pub trait Buffer: sealed::Sealed + Deref<Target = str> {
+ /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`.
+ type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;
+
+ #[doc(hidden)]
+ fn into_cow(self) -> Self::Cow;
+
+ /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.
+ type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;
+
+ #[doc(hidden)]
+ fn into_byte_cow(self) -> Self::ByteCow;
+
+ /// Cuts away some characters at the beginning and some at the end. Given
+ /// range has to be in bounds.
+ #[doc(hidden)]
+ fn cut(self, range: Range<usize>) -> Self;
+}
+
+mod sealed {
+ pub trait Sealed {}
+}
+
+impl<'a> sealed::Sealed for &'a str {}
+impl<'a> Buffer for &'a str {
+ #[doc(hidden)]
+ fn cut(self, range: Range<usize>) -> Self {
+ &self[range]
+ }
+
+ type Cow = Cow<'a, str>;
+ #[doc(hidden)]
+ fn into_cow(self) -> Self::Cow {
+ self.into()
+ }
+ type ByteCow = Cow<'a, [u8]>;
+ #[doc(hidden)]
+ fn into_byte_cow(self) -> Self::ByteCow {
+ self.as_bytes().into()
+ }
+}
+
+impl sealed::Sealed for String {}
+impl Buffer for String {
+ #[doc(hidden)]
+ fn cut(mut self, range: Range<usize>) -> Self {
+ // This is not the most efficient way, but it works. First we cut the
+ // end, then the beginning. Note that `drain` also removes the range if
+ // the iterator is not consumed.
+ self.truncate(range.end);
+ self.drain(..range.start);
+ self
+ }
+
+ type Cow = Cow<'static, str>;
+ #[doc(hidden)]
+ fn into_cow(self) -> Self::Cow {
+ self.into()
+ }
+
+ type ByteCow = Cow<'static, [u8]>;
+ #[doc(hidden)]
+ fn into_byte_cow(self) -> Self::ByteCow {
+ self.into_bytes().into()
+ }
+}
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..07026ed
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,81 @@
+use crate::{
+ BoolLit,
+ Buffer,
+ ByteLit,
+ ByteStringLit,
+ CharLit,
+ ParseError,
+ FloatLit,
+ IntegerLit,
+ Literal,
+ StringLit,
+ err::{perr, ParseErrorKind::*},
+};
+
+
+impl<B: Buffer> Literal<B> {
+ /// Parses the given input as a Rust literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ let first = first_byte_or_empty(&input)?;
+ let second = input.as_bytes().get(1).copied();
+
+ match first {
+ b'f' if &*input == "false" => Ok(Self::Bool(BoolLit::False)),
+ b't' if &*input == "true" => Ok(Self::Bool(BoolLit::True)),
+
+ // A number literal (integer or float).
+ digit @ b'0'..=b'9' => {
+ // To figure out whether this is a float or integer, we do some
+ // quick inspection here. Yes, this is technically duplicate
+ // work with what is happening in the integer/float parse
+ // methods, but it makes the code way easier for now and won't
+ // be a huge performance loss.
+ let end = 1 + end_dec_digits(&input[1..]);
+ match input.as_bytes().get(end) {
+ // Potential chars in integer literals: b, o, x for base; u
+ // and i for type suffix.
+ None | Some(b'b') | Some(b'o') | Some(b'x') | Some(b'u') | Some(b'i')
+ => IntegerLit::parse_impl(input, digit).map(Literal::Integer),
+
+ // Potential chars for float literals: `.` as fractional
+ // period, e and E as exponent start and f as type suffix.
+ Some(b'.') | Some(b'e') | Some(b'E') | Some(b'f')
+ => FloatLit::parse_impl(input).map(Literal::Float),
+
+ _ => Err(perr(end, UnexpectedChar)),
+ }
+ },
+
+ b'\'' => CharLit::parse_impl(input).map(Literal::Char),
+ b'"' | b'r' => StringLit::parse_impl(input).map(Literal::String),
+
+ b'b' if second == Some(b'\'') => ByteLit::parse_impl(input).map(Literal::Byte),
+ b'b' if second == Some(b'r') || second == Some(b'"')
+ => ByteStringLit::parse_impl(input).map(Literal::ByteString),
+
+ _ => Err(perr(None, InvalidLiteral)),
+ }
+ }
+}
+
+
+pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
+ s.as_bytes().get(0).copied().ok_or(perr(None, Empty))
+}
+
+/// Returns the index of the first non-underscore, non-decimal digit in `input`,
+/// or the `input.len()` if all characters are decimal digits.
+pub(crate) fn end_dec_digits(input: &str) -> usize {
+ input.bytes()
+ .position(|b| !matches!(b, b'_' | b'0'..=b'9'))
+ .unwrap_or(input.len())
+}
+
+pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
+ match digit {
+ b'0'..=b'9' => Some(digit - b'0'),
+ b'a'..=b'f' => Some(digit - b'a' + 10),
+ b'A'..=b'F' => Some(digit - b'A' + 10),
+ _ => None,
+ }
+}
diff --git a/src/string/mod.rs b/src/string/mod.rs
new file mode 100644
index 0000000..a21f7a7
--- /dev/null
+++ b/src/string/mod.rs
@@ -0,0 +1,110 @@
+use std::{fmt, ops::Range};
+
+use crate::{
+ Buffer, ParseError,
+ err::{perr, ParseErrorKind::*},
+ escape::{scan_raw_string, unescape_string},
+ parse::first_byte_or_empty,
+};
+
+
+/// A string or raw string literal, e.g. `"foo"`, `"Grüße"` or `r#"a🦊c"d🦀f"#`.
+///
+/// See [the reference][ref] for more information.
+///
+/// [ref]: https://doc.rust-lang.org/reference/tokens.html#string-literals
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StringLit<B: Buffer> {
+ /// The raw input.
+ raw: B,
+
+ /// The string value (with all escaped unescaped), or `None` if there were
+ /// no escapes. In the latter case, `input` is the string value.
+ value: Option<String>,
+
+ /// The number of hash signs in case of a raw string literal, or `None` if
+ /// it's not a raw string literal.
+ num_hashes: Option<u32>,
+}
+
+impl<B: Buffer> StringLit<B> {
+ /// Parses the input as a (raw) string literal. Returns an error if the
+ /// input is invalid or represents a different kind of literal.
+ pub fn parse(input: B) -> Result<Self, ParseError> {
+ match first_byte_or_empty(&input)? {
+ b'r' | b'"' => Self::parse_impl(input),
+ _ => Err(perr(0, InvalidStringLiteralStart)),
+ }
+ }
+
+ /// Returns the string value this literal represents (where all escapes have
+ /// been turned into their respective values).
+ pub fn value(&self) -> &str {
+ self.value.as_deref().unwrap_or(&self.raw[self.inner_range()])
+ }
+
+ /// Like `value` but returns a potentially owned version of the value.
+ ///
+ /// The return value is either `Cow<'static, str>` if `B = String`, or
+ /// `Cow<'a, str>` if `B = &'a str`.
+ pub fn into_value(self) -> B::Cow {
+ let inner_range = self.inner_range();
+ let Self { raw, value, .. } = self;
+ value.map(B::Cow::from).unwrap_or_else(|| raw.cut(inner_range).into_cow())
+ }
+
+ /// Returns whether this literal is a raw string literal (starting with
+ /// `r`).
+ pub fn is_raw_string(&self) -> bool {
+ self.num_hashes.is_some()
+ }
+
+ /// The range within `self.raw` that excludes the quotes and potential `r#`.
+ fn inner_range(&self) -> Range<usize> {
+ match self.num_hashes {
+ None => 1..self.raw.len() - 1,
+ Some(n) => 1 + n as usize + 1..self.raw.len() - n as usize - 1,
+ }
+ }
+
+ /// Precondition: input has to start with either `"` or `r`.
+ pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
+ if input.starts_with('r') {
+ let (value, num_hashes) = scan_raw_string::<char>(&input, 1)?;
+ Ok(Self {
+ raw: input,
+ value,
+ num_hashes: Some(num_hashes),
+ })
+ } else {
+ let value = unescape_string::<char>(&input, 1)?;
+ Ok(Self {
+ raw: input,
+ value,
+ num_hashes: None,
+ })
+ }
+ }
+}
+
+impl StringLit<&str> {
+ /// Makes a copy of the underlying buffer and returns the owned version of
+ /// `Self`.
+ pub fn into_owned(self) -> StringLit<String> {
+ StringLit {
+ raw: self.raw.to_owned(),
+ value: self.value,
+ num_hashes: self.num_hashes,
+ }
+ }
+}
+
+impl<B: Buffer> fmt::Display for StringLit<B> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.pad(&self.raw)
+ }
+}
+
+
+#[cfg(test)]
+mod tests;
diff --git a/src/string/tests.rs b/src/string/tests.rs
new file mode 100644
index 0000000..8d8882e
--- /dev/null
+++ b/src/string/tests.rs
@@ -0,0 +1,263 @@
+use crate::{Literal, StringLit, test_util::assert_parse_ok_eq};
+
+// ===== Utility functions =======================================================================
+
+macro_rules! check {
+ ($lit:literal, $has_escapes:expr, $num_hashes:expr) => {
+ let input = stringify!($lit);
+ let expected = StringLit {
+ raw: input,
+ value: if $has_escapes { Some($lit.to_string()) } else { None },
+ num_hashes: $num_hashes,
+ };
+
+ assert_parse_ok_eq(input, StringLit::parse(input), expected.clone(), "StringLit::parse");
+ assert_parse_ok_eq(
+ input, Literal::parse(input), Literal::String(expected), "Literal::parse");
+ assert_eq!(StringLit::parse(input).unwrap().value(), $lit);
+ assert_eq!(StringLit::parse(input).unwrap().into_value(), $lit);
+ };
+}
+
+
+// ===== Actual tests ============================================================================
+
+#[test]
+fn simple() {
+ check!("", false, None);
+ check!("a", false, None);
+ check!("peter", false, None);
+ check!("Sei gegrüßt, Bärthelt!", false, None);
+ check!("أنا لا أتحدث العربية", false, None);
+ check!("お前はもう死んでいる", false, None);
+ check!("Пушки - интересные музыкальные инструменты", false, None);
+ check!("lit 👌 😂 af", false, None);
+}
+
+#[test]
+fn special_whitespace() {
+ let strings = ["\n", "\t", "foo\tbar", "🦊\n"];
+
+ for &s in &strings {
+ let input = format!(r#""{}""#, s);
+ let input_raw = format!(r#"r"{}""#, s);
+ for (input, num_hashes) in vec![(input, None), (input_raw, Some(0))] {
+ let expected = StringLit {
+ raw: &*input,
+ value: None,
+ num_hashes,
+ };
+ assert_parse_ok_eq(
+ &input, StringLit::parse(&*input), expected.clone(), "StringLit::parse");
+ assert_parse_ok_eq(
+ &input, Literal::parse(&*input), Literal::String(expected), "Literal::parse");
+ assert_eq!(StringLit::parse(&*input).unwrap().value(), s);
+ assert_eq!(StringLit::parse(&*input).unwrap().into_value(), s);
+ }
+ }
+}
+
+#[test]
+fn simple_escapes() {
+ check!("a\nb", true, None);
+ check!("\nb", true, None);
+ check!("a\n", true, None);
+ check!("\n", true, None);
+
+ check!("\x60犬 \t 猫\r馬\n うさぎ \0ネズミ", true, None);
+ check!("నా \\పిల్లి లావుగా ఉంది", true, None);
+ check!("నా \\పిల్లి లావుగా 🐈\"ఉంది", true, None);
+ check!("\\నా\\ పిల్లి లావుగా\" ఉంది\"", true, None);
+ check!("\"నా \\🐈 పిల్లి లావుగా \" ఉంది\\", true, None);
+
+ check!("\x00", true, None);
+ check!(" \x01", true, None);
+ check!("\x0c 🦊", true, None);
+ check!(" 🦊\x0D ", true, None);
+ check!("\\x13", true, None);
+ check!("\"x30", true, None);
+}
+
+#[test]
+fn unicode_escapes() {
+ check!("\u{0}", true, None);
+ check!(" \u{00}", true, None);
+ check!("\u{b} ", true, None);
+ check!(" \u{B} ", true, None);
+ check!("\u{7e}", true, None);
+ check!("నక్క\u{E4}", true, None);
+ check!("\u{e4} నక్క", true, None);
+ check!(" \u{fc}నక్క ", true, None);
+ check!("\u{Fc}", true, None);
+ check!("\u{fC}🦊\nлиса", true, None);
+ check!("лиса\u{FC}", true, None);
+ check!("лиса\u{b10}నక్క🦊", true, None);
+ check!("\"నక్క\u{B10}", true, None);
+ check!("лиса\\\u{0b10}", true, None);
+ check!("ли🦊са\\\"\u{0b10}", true, None);
+ check!("నక్క\\\\u{0b10}", true, None);
+ check!("\u{2764}Füchsin", true, None);
+ check!("Füchse \u{1f602}", true, None);
+ check!("cd\u{1F602}ab", true, None);
+
+ check!("\u{0}🦊", true, None);
+ check!("лиса\u{0__}", true, None);
+ check!("\\🦊\u{3_b}", true, None);
+ check!("🦊\u{1_F_6_0_2}Füchsin", true, None);
+ check!("నక్క\\\u{1_F6_02_____}నక్క", true, None);
+}
+
+#[test]
+fn string_continue() {
+ check!("నక్క\
+ bar", true, None);
+ check!("foo\
+🦊", true, None);
+
+ check!("foo\
+
+ banana", true, None);
+
+ // Weird whitespace characters
+ let lit = StringLit::parse("\"foo\\\n\r\t\n \n\tbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), "foobar");
+ let lit = StringLit::parse("\"foo\\\n\u{85}bar\"").expect("failed to parse");
+ assert_eq!(lit.value(), "foo\u{85}bar");
+ let lit = StringLit::parse("\"foo\\\n\u{a0}bar\"").expect("failed to parse");
+ assert_eq!(lit.value(), "foo\u{a0}bar");
+
+ // Raw strings do not handle "string continues"
+ check!(r"foo\
+ bar", false, Some(0));
+}
+
+#[test]
+fn crlf_newlines() {
+ let lit = StringLit::parse("\"foo\r\nbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), "foo\nbar");
+
+ let lit = StringLit::parse("\"\r\nbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), "\nbar");
+
+ let lit = StringLit::parse("\"лиса\r\n\"").expect("failed to parse");
+ assert_eq!(lit.value(), "лиса\n");
+
+ let lit = StringLit::parse("r\"foo\r\nbar\"").expect("failed to parse");
+ assert_eq!(lit.value(), "foo\nbar");
+
+ let lit = StringLit::parse("r#\"\r\nbar\"#").expect("failed to parse");
+ assert_eq!(lit.value(), "\nbar");
+
+ let lit = StringLit::parse("r##\"лиса\r\n\"##").expect("failed to parse");
+ assert_eq!(lit.value(), "лиса\n");
+}
+
+#[test]
+fn raw_string() {
+ check!(r"", false, Some(0));
+ check!(r"a", false, Some(0));
+ check!(r"peter", false, Some(0));
+ check!(r"Sei gegrüßt, Bärthelt!", false, Some(0));
+ check!(r"أنا لا أتحدث العربية", false, Some(0));
+ check!(r"お前はもう死んでいる", false, Some(0));
+ check!(r"Пушки - интересные музыкальные инструменты", false, Some(0));
+ check!(r"lit 👌 😂 af", false, Some(0));
+
+ check!(r#""#, false, Some(1));
+ check!(r#"a"#, false, Some(1));
+ check!(r##"peter"##, false, Some(2));
+ check!(r###"Sei gegrüßt, Bärthelt!"###, false, Some(3));
+ check!(r########"lit 👌 😂 af"########, false, Some(8));
+
+ check!(r#"foo " bar"#, false, Some(1));
+ check!(r##"foo " bar"##, false, Some(2));
+ check!(r#"foo """" '"'" bar"#, false, Some(1));
+ check!(r#""foo""#, false, Some(1));
+ check!(r###""foo'"###, false, Some(3));
+ check!(r#""x'#_#s'"#, false, Some(1));
+ check!(r"#", false, Some(0));
+ check!(r"foo#", false, Some(0));
+ check!(r"##bar", false, Some(0));
+ check!(r###""##foo"##bar'"###, false, Some(3));
+
+ check!(r"さび\n\t\r\0\\x60\u{123}フェリス", false, Some(0));
+ check!(r#"さび\n\t\r\0\\x60\u{123}フェリス"#, false, Some(1));
+}
+
+#[test]
+fn parse_err() {
+ assert_err!(StringLit, r#"""#, UnterminatedString, None);
+ assert_err!(StringLit, r#""犬"#, UnterminatedString, None);
+ assert_err!(StringLit, r#""Jürgen"#, UnterminatedString, None);
+ assert_err!(StringLit, r#""foo bar baz"#, UnterminatedString, None);
+
+ assert_err!(StringLit, r#""fox"peter"#, UnexpectedChar, 5..10);
+ assert_err!(StringLit, r#""fox"peter""#, UnexpectedChar, 5..11);
+ assert_err!(StringLit, r#""fox"🦊"#, UnexpectedChar, 5..9);
+ assert_err!(StringLit, r###"r#"foo "# bar"#"###, UnexpectedChar, 9..15);
+
+ assert_err!(StringLit, "\"\r\"", IsolatedCr, 1);
+ assert_err!(StringLit, "\"fo\rx\"", IsolatedCr, 3);
+ assert_err!(StringLit, "r\"\r\"", IsolatedCr, 2);
+ assert_err!(StringLit, "r\"fo\rx\"", IsolatedCr, 4);
+
+ assert_err!(StringLit, r##"r####""##, UnterminatedRawString, None);
+ assert_err!(StringLit, r#####"r##"foo"#bar"#####, UnterminatedRawString, None);
+ assert_err!(StringLit, r##"r####"##, InvalidLiteral, None);
+ assert_err!(StringLit, r##"r####x"##, InvalidLiteral, None);
+}
+
+#[test]
+fn invald_ascii_escapes() {
+ assert_err!(StringLit, r#""\x80""#, NonAsciiXEscape, 1..5);
+ assert_err!(StringLit, r#""🦊\x81""#, NonAsciiXEscape, 5..9);
+ assert_err!(StringLit, r#"" \x8a""#, NonAsciiXEscape, 2..6);
+ assert_err!(StringLit, r#""\x8Ff""#, NonAsciiXEscape, 1..5);
+ assert_err!(StringLit, r#""\xa0 ""#, NonAsciiXEscape, 1..5);
+ assert_err!(StringLit, r#""నక్క\xB0""#, NonAsciiXEscape, 13..17);
+ assert_err!(StringLit, r#""\xc3నక్క""#, NonAsciiXEscape, 1..5);
+ assert_err!(StringLit, r#""\xDf🦊""#, NonAsciiXEscape, 1..5);
+ assert_err!(StringLit, r#""నక్క\xffనక్క""#, NonAsciiXEscape, 13..17);
+ assert_err!(StringLit, r#""\xfF ""#, NonAsciiXEscape, 1..5);
+ assert_err!(StringLit, r#"" \xFf""#, NonAsciiXEscape, 2..6);
+ assert_err!(StringLit, r#""నక్క \xFF""#, NonAsciiXEscape, 15..19);
+}
+
+#[test]
+fn invald_escapes() {
+ assert_err!(StringLit, r#""\a""#, UnknownEscape, 1..3);
+ assert_err!(StringLit, r#""foo\y""#, UnknownEscape, 4..6);
+ assert_err!(StringLit, r#""\"#, UnterminatedString, None);
+ assert_err!(StringLit, r#""\x""#, UnterminatedEscape, 1..3);
+ assert_err!(StringLit, r#""🦊\x1""#, UnterminatedEscape, 5..8);
+ assert_err!(StringLit, r#"" \xaj""#, InvalidXEscape, 2..6);
+ assert_err!(StringLit, r#""నక్క\xjb""#, InvalidXEscape, 13..17);
+}
+
+#[test]
+fn invalid_unicode_escapes() {
+ assert_err!(StringLit, r#""\u""#, UnicodeEscapeWithoutBrace, 1..3);
+ assert_err!(StringLit, r#""🦊\u ""#, UnicodeEscapeWithoutBrace, 5..7);
+ assert_err!(StringLit, r#""\u3""#, UnicodeEscapeWithoutBrace, 1..3);
+
+ assert_err!(StringLit, r#""\u{""#, UnterminatedUnicodeEscape, 1..4);
+ assert_err!(StringLit, r#""\u{12""#, UnterminatedUnicodeEscape, 1..6);
+ assert_err!(StringLit, r#""🦊\u{a0b""#, UnterminatedUnicodeEscape, 5..11);
+ assert_err!(StringLit, r#""\u{a0_b ""#, UnterminatedUnicodeEscape, 1..10);
+
+ assert_err!(StringLit, r#""\u{_}నక్క""#, InvalidStartOfUnicodeEscape, 4);
+ assert_err!(StringLit, r#""\u{_5f}""#, InvalidStartOfUnicodeEscape, 4);
+
+ assert_err!(StringLit, r#""fox\u{x}""#, NonHexDigitInUnicodeEscape, 7);
+ assert_err!(StringLit, r#""\u{0x}🦊""#, NonHexDigitInUnicodeEscape, 5);
+ assert_err!(StringLit, r#""నక్క\u{3bx}""#, NonHexDigitInUnicodeEscape, 18);
+ assert_err!(StringLit, r#""\u{3b_x}лиса""#, NonHexDigitInUnicodeEscape, 7);
+ assert_err!(StringLit, r#""\u{4x_}""#, NonHexDigitInUnicodeEscape, 5);
+
+ assert_err!(StringLit, r#""\u{1234567}""#, TooManyDigitInUnicodeEscape, 10);
+ assert_err!(StringLit, r#""నక్క\u{1234567}🦊""#, TooManyDigitInUnicodeEscape, 22);
+ assert_err!(StringLit, r#""నక్క\u{1_23_4_56_7}""#, TooManyDigitInUnicodeEscape, 26);
+ assert_err!(StringLit, r#""\u{abcdef123}лиса""#, TooManyDigitInUnicodeEscape, 10);
+
+ assert_err!(StringLit, r#""\u{110000}fox""#, InvalidUnicodeEscapeChar, 1..10);
+}
diff --git a/src/test_util.rs b/src/test_util.rs
new file mode 100644
index 0000000..dc7a1f6
--- /dev/null
+++ b/src/test_util.rs
@@ -0,0 +1,81 @@
+use crate::*;
+use std::fmt::{Debug, Display};
+
+
+#[track_caller]
+pub(crate) fn assert_parse_ok_eq<T: PartialEq + Debug + Display>(
+ input: &str,
+ result: Result<T, ParseError>,
+ expected: T,
+ parse_method: &str,
+) {
+ match result {
+ Ok(actual) if actual == expected => {
+ if actual.to_string() != input {
+ panic!(
+ "formatting does not yield original input `{}`: {:?}",
+ input,
+ actual,
+ );
+ }
+ }
+ Ok(actual) => {
+ panic!(
+ "unexpected parsing result (with `{}`) for `{}`:\nactual: {:?}\nexpected: {:?}",
+ parse_method,
+ input,
+ actual,
+ expected,
+ );
+ }
+ Err(e) => {
+ panic!(
+ "expected `{}` to be parsed (with `{}`) successfully, but it failed: {:?}",
+ input,
+ parse_method,
+ e,
+ );
+ }
+ }
+}
+
+macro_rules! assert_err {
+ ($ty:ident, $input:literal, $kind:ident, $( $span:tt )+ ) => {
+ assert_err_single!($ty::parse($input), $kind, $($span)+);
+ assert_err_single!($crate::Literal::parse($input), $kind, $($span)+);
+ };
+}
+
+macro_rules! assert_err_single {
+ ($expr:expr, $kind:ident, $( $span:tt )+ ) => {
+ let res = $expr;
+ let err = match res {
+ Err(e) => e,
+ Ok(v) => panic!(
+ "Expected `{}` to return an error, but it returned Ok({:?})",
+ stringify!($expr),
+ v,
+ ),
+ };
+ if err.kind != $crate::err::ParseErrorKind::$kind {
+ panic!(
+ "Expected error kind {} for `{}` but got {:?}",
+ stringify!($kind),
+ stringify!($expr),
+ err.kind,
+ )
+ }
+ let expected_span = assert_err_single!(@span $($span)+);
+ if err.span != expected_span {
+ panic!(
+ "Expected error span {:?} for `{}` but got {:?}",
+ expected_span,
+ stringify!($expr),
+ err.span,
+ )
+ }
+ };
+ (@span $start:literal .. $end:literal) => { Some($start .. $end) };
+ (@span $at:literal) => { Some($at.. $at + 1) };
+ (@span None) => { None };
+}
diff --git a/src/tests.rs b/src/tests.rs
new file mode 100644
index 0000000..526917e
--- /dev/null
+++ b/src/tests.rs
@@ -0,0 +1,351 @@
+use crate::Literal;
+
+
+#[test]
+fn empty() {
+ assert_err!(Literal, "", Empty, None);
+}
+
+#[test]
+fn invalid_literals() {
+ assert_err_single!(Literal::parse("."), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("+"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("-"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("e"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("e8"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("f32"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("foo"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("inf"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("nan"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("NaN"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("NAN"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("_2.7"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse(".5"), InvalidLiteral, None);
+}
+
+#[test]
+fn misc() {
+ assert_err_single!(Literal::parse("0x44.5"), InvalidIntegerTypeSuffix, 4..6);
+ assert_err_single!(Literal::parse("a"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse(";"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("0;"), UnexpectedChar, 1);
+ assert_err_single!(Literal::parse("0a"), UnexpectedChar, 1);
+ assert_err_single!(Literal::parse("0z"), UnexpectedChar, 1);
+ assert_err_single!(Literal::parse(" 0"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("0 "), UnexpectedChar, 1);
+ assert_err_single!(Literal::parse("0a3"), UnexpectedChar, 1);
+ assert_err_single!(Literal::parse("0z3"), UnexpectedChar, 1);
+ assert_err_single!(Literal::parse("_"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("_3"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("12a3"), UnexpectedChar, 2);
+ assert_err_single!(Literal::parse("12f3"), InvalidFloatTypeSuffix, 2..4);
+ assert_err_single!(Literal::parse("12f_"), InvalidFloatTypeSuffix, 2..4);
+ assert_err_single!(Literal::parse("12F_"), UnexpectedChar, 2);
+ assert_err_single!(Literal::parse("a_123"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("B_123"), InvalidLiteral, None);
+ assert_err_single!(Literal::parse("54321a64"), UnexpectedChar, 5);
+}
+
+macro_rules! assert_no_panic {
+ ($input:expr) => {
+ let arr = $input;
+ let input = std::str::from_utf8(&arr).expect("not unicode");
+ let res = std::panic::catch_unwind(move || {
+ let _ = Literal::parse(input);
+ let _ = crate::BoolLit::parse(input);
+ let _ = crate::IntegerLit::parse(input);
+ let _ = crate::FloatLit::parse(input);
+ let _ = crate::CharLit::parse(input);
+ let _ = crate::StringLit::parse(input);
+ let _ = crate::ByteLit::parse(input);
+ let _ = crate::ByteStringLit::parse(input);
+ });
+
+ if let Err(e) = res {
+ println!("\n!!! panic for: {:?}", input);
+ std::panic::resume_unwind(e);
+ }
+ };
+}
+
+#[test]
+#[ignore]
+fn never_panic_up_to_3() {
+ for a in 0..128 {
+ assert_no_panic!([a]);
+ for b in 0..128 {
+ assert_no_panic!([a, b]);
+ for c in 0..128 {
+ assert_no_panic!([a, b, c]);
+ }
+ }
+ }
+}
+
+// This test takes super long in debug mode, but in release mode it's fine.
+#[test]
+#[ignore]
+fn never_panic_len_4() {
+ for a in 0..128 {
+ for b in 0..128 {
+ for c in 0..128 {
+ for d in 0..128 {
+ assert_no_panic!([a, b, c, d]);
+ }
+ }
+ }
+ }
+}
+
+#[cfg(feature = "proc-macro2")]
+#[test]
+fn proc_macro() {
+ use std::convert::TryFrom;
+ use proc_macro2::{
+ self as pm2, TokenTree, Group, TokenStream, Delimiter, Spacing, Punct, Span, Ident,
+ };
+ use crate::{
+ BoolLit, ByteLit, ByteStringLit, CharLit, FloatLit, IntegerLit, StringLit, err::TokenKind
+ };
+
+
+ macro_rules! assert_invalid_token {
+ ($input:expr, expected: $expected:path, actual: $actual:path $(,)?) => {
+ let err = $input.unwrap_err();
+ if err.expected != $expected {
+ panic!("err.expected was expected to be {:?}, but is {:?}", $expected, err.expected);
+ }
+ if err.actual != $actual {
+ panic!("err.actual was expected to be {:?}, but is {:?}", $actual, err.actual);
+ }
+ };
+ }
+
+
+ let pm_u16_lit = pm2::Literal::u16_suffixed(2700);
+ let pm_i16_lit = pm2::Literal::i16_unsuffixed(3912);
+ let pm_f32_lit = pm2::Literal::f32_unsuffixed(3.14);
+ let pm_f64_lit = pm2::Literal::f64_suffixed(99.3);
+ let pm_string_lit = pm2::Literal::string("hello 🦊");
+ let pm_bytestr_lit = pm2::Literal::byte_string(b"hello \nfoxxo");
+ let pm_char_lit = pm2::Literal::character('🦀');
+
+ let u16_lit = Literal::parse("2700u16".to_string()).unwrap();
+ let i16_lit = Literal::parse("3912".to_string()).unwrap();
+ let f32_lit = Literal::parse("3.14".to_string()).unwrap();
+ let f64_lit = Literal::parse("99.3f64".to_string()).unwrap();
+ let string_lit = Literal::parse(r#""hello 🦊""#.to_string()).unwrap();
+ let bytestr_lit = Literal::parse(r#"b"hello \nfoxxo""#.to_string()).unwrap();
+ let char_lit = Literal::parse("'🦀'".to_string()).unwrap();
+
+ assert_eq!(Literal::from(&pm_u16_lit), u16_lit);
+ assert_eq!(Literal::from(&pm_i16_lit), i16_lit);
+ assert_eq!(Literal::from(&pm_f32_lit), f32_lit);
+ assert_eq!(Literal::from(&pm_f64_lit), f64_lit);
+ assert_eq!(Literal::from(&pm_string_lit), string_lit);
+ assert_eq!(Literal::from(&pm_bytestr_lit), bytestr_lit);
+ assert_eq!(Literal::from(&pm_char_lit), char_lit);
+
+
+ let group = TokenTree::from(Group::new(Delimiter::Brace, TokenStream::new()));
+ let punct = TokenTree::from(Punct::new(':', Spacing::Alone));
+ let ident = TokenTree::from(Ident::new("peter", Span::call_site()));
+
+ assert_eq!(
+ Literal::try_from(TokenTree::Literal(pm2::Literal::string("hello 🦊"))).unwrap(),
+ Literal::String(StringLit::parse(r#""hello 🦊""#.to_string()).unwrap()),
+ );
+ assert_invalid_token!(
+ Literal::try_from(punct.clone()),
+ expected: TokenKind::Literal,
+ actual: TokenKind::Punct,
+ );
+ assert_invalid_token!(
+ Literal::try_from(group.clone()),
+ expected: TokenKind::Literal,
+ actual: TokenKind::Group,
+ );
+ assert_invalid_token!(
+ Literal::try_from(ident.clone()),
+ expected: TokenKind::Literal,
+ actual: TokenKind::Ident,
+ );
+
+
+ assert_eq!(Literal::from(IntegerLit::try_from(pm_u16_lit.clone()).unwrap()), u16_lit);
+ assert_eq!(Literal::from(IntegerLit::try_from(pm_i16_lit.clone()).unwrap()), i16_lit);
+ assert_eq!(Literal::from(FloatLit::try_from(pm_f32_lit.clone()).unwrap()), f32_lit);
+ assert_eq!(Literal::from(FloatLit::try_from(pm_f64_lit.clone()).unwrap()), f64_lit);
+ assert_eq!(Literal::from(StringLit::try_from(pm_string_lit.clone()).unwrap()), string_lit);
+ assert_eq!(Literal::from(ByteStringLit::try_from(pm_bytestr_lit.clone()).unwrap()), bytestr_lit);
+ assert_eq!(Literal::from(CharLit::try_from(pm_char_lit.clone()).unwrap()), char_lit);
+
+ assert_invalid_token!(
+ StringLit::try_from(pm_u16_lit.clone()),
+ expected: TokenKind::StringLit,
+ actual: TokenKind::IntegerLit,
+ );
+ assert_invalid_token!(
+ StringLit::try_from(pm_f32_lit.clone()),
+ expected: TokenKind::StringLit,
+ actual: TokenKind::FloatLit,
+ );
+ assert_invalid_token!(
+ ByteLit::try_from(pm_bytestr_lit.clone()),
+ expected: TokenKind::ByteLit,
+ actual: TokenKind::ByteStringLit,
+ );
+ assert_invalid_token!(
+ ByteLit::try_from(pm_i16_lit.clone()),
+ expected: TokenKind::ByteLit,
+ actual: TokenKind::IntegerLit,
+ );
+ assert_invalid_token!(
+ IntegerLit::try_from(pm_string_lit.clone()),
+ expected: TokenKind::IntegerLit,
+ actual: TokenKind::StringLit,
+ );
+ assert_invalid_token!(
+ IntegerLit::try_from(pm_char_lit.clone()),
+ expected: TokenKind::IntegerLit,
+ actual: TokenKind::CharLit,
+ );
+
+
+ assert_eq!(
+ Literal::from(IntegerLit::try_from(TokenTree::from(pm_u16_lit.clone())).unwrap()),
+ u16_lit,
+ );
+ assert_eq!(
+ Literal::from(IntegerLit::try_from(TokenTree::from(pm_i16_lit.clone())).unwrap()),
+ i16_lit,
+ );
+ assert_eq!(
+ Literal::from(FloatLit::try_from(TokenTree::from(pm_f32_lit.clone())).unwrap()),
+ f32_lit,
+ );
+ assert_eq!(
+ Literal::from(FloatLit::try_from(TokenTree::from(pm_f64_lit.clone())).unwrap()),
+ f64_lit,
+ );
+ assert_eq!(
+ Literal::from(StringLit::try_from(TokenTree::from(pm_string_lit.clone())).unwrap()),
+ string_lit,
+ );
+ assert_eq!(
+ Literal::from(ByteStringLit::try_from(TokenTree::from(pm_bytestr_lit.clone())).unwrap()),
+ bytestr_lit,
+ );
+ assert_eq!(
+ Literal::from(CharLit::try_from(TokenTree::from(pm_char_lit.clone())).unwrap()),
+ char_lit,
+ );
+
+ assert_invalid_token!(
+ StringLit::try_from(TokenTree::from(pm_u16_lit.clone())),
+ expected: TokenKind::StringLit,
+ actual: TokenKind::IntegerLit,
+ );
+ assert_invalid_token!(
+ StringLit::try_from(TokenTree::from(pm_f32_lit.clone())),
+ expected: TokenKind::StringLit,
+ actual: TokenKind::FloatLit,
+ );
+ assert_invalid_token!(
+ BoolLit::try_from(TokenTree::from(pm_bytestr_lit.clone())),
+ expected: TokenKind::BoolLit,
+ actual: TokenKind::ByteStringLit,
+ );
+ assert_invalid_token!(
+ BoolLit::try_from(TokenTree::from(pm_i16_lit.clone())),
+ expected: TokenKind::BoolLit,
+ actual: TokenKind::IntegerLit,
+ );
+ assert_invalid_token!(
+ IntegerLit::try_from(TokenTree::from(pm_string_lit.clone())),
+ expected: TokenKind::IntegerLit,
+ actual: TokenKind::StringLit,
+ );
+ assert_invalid_token!(
+ IntegerLit::try_from(TokenTree::from(pm_char_lit.clone())),
+ expected: TokenKind::IntegerLit,
+ actual: TokenKind::CharLit,
+ );
+
+ assert_invalid_token!(
+ StringLit::try_from(TokenTree::from(group)),
+ expected: TokenKind::StringLit,
+ actual: TokenKind::Group,
+ );
+ assert_invalid_token!(
+ BoolLit::try_from(TokenTree::from(punct)),
+ expected: TokenKind::BoolLit,
+ actual: TokenKind::Punct,
+ );
+ assert_invalid_token!(
+ FloatLit::try_from(TokenTree::from(ident)),
+ expected: TokenKind::FloatLit,
+ actual: TokenKind::Ident,
+ );
+}
+
+#[cfg(feature = "proc-macro2")]
+#[test]
+fn bool_try_from_tt() {
+ use std::convert::TryFrom;
+ use proc_macro2::{Ident, Span, TokenTree};
+ use crate::BoolLit;
+
+
+ let ident = |s: &str| Ident::new(s, Span::call_site());
+
+ assert_eq!(BoolLit::try_from(TokenTree::Ident(ident("true"))).unwrap(), BoolLit::True);
+ assert_eq!(BoolLit::try_from(TokenTree::Ident(ident("false"))).unwrap(), BoolLit::False);
+
+ assert!(BoolLit::try_from(TokenTree::Ident(ident("falsex"))).is_err());
+ assert!(BoolLit::try_from(TokenTree::Ident(ident("_false"))).is_err());
+ assert!(BoolLit::try_from(TokenTree::Ident(ident("False"))).is_err());
+ assert!(BoolLit::try_from(TokenTree::Ident(ident("True"))).is_err());
+ assert!(BoolLit::try_from(TokenTree::Ident(ident("ltrue"))).is_err());
+
+
+ assert_eq!(
+ Literal::try_from(TokenTree::Ident(ident("true"))).unwrap(),
+ Literal::Bool(BoolLit::True),
+ );
+ assert_eq!(
+ Literal::try_from(TokenTree::Ident(ident("false"))).unwrap(),
+ Literal::Bool(BoolLit::False),
+ );
+
+ assert!(Literal::try_from(TokenTree::Ident(ident("falsex"))).is_err());
+ assert!(Literal::try_from(TokenTree::Ident(ident("_false"))).is_err());
+ assert!(Literal::try_from(TokenTree::Ident(ident("False"))).is_err());
+ assert!(Literal::try_from(TokenTree::Ident(ident("True"))).is_err());
+ assert!(Literal::try_from(TokenTree::Ident(ident("ltrue"))).is_err());
+}
+
+#[cfg(feature = "proc-macro2")]
+#[test]
+fn invalid_token_display() {
+ use crate::{InvalidToken, err::TokenKind};
+
+ let span = crate::err::Span::Two(proc_macro2::Span::call_site());
+ assert_eq!(
+ InvalidToken {
+ actual: TokenKind::StringLit,
+ expected: TokenKind::FloatLit,
+ span,
+ }.to_string(),
+ r#"expected a float literal (e.g. `3.14`), but found a string literal (e.g. "Ferris")"#,
+ );
+
+ assert_eq!(
+ InvalidToken {
+ actual: TokenKind::Punct,
+ expected: TokenKind::Literal,
+ span,
+ }.to_string(),
+ r#"expected a literal, but found a punctuation character"#,
+ );
+}