summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-02-18 08:41:16 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-02-18 08:41:16 +0000
commit17ffee4c9e9f6277d67f0c18315194ab8f655d3b (patch)
treed618c4f057d10b639bbda658608ddc47f7613b9c
parentc9dc7a33677efefec82548679d4e744b256b997d (diff)
parent69d145f5252d6dfff0f93f2eb778b9f431107387 (diff)
downloadpest-android14-d1-release.tar.gz
Change-Id: I4a2253304e82188c08c5c607a7da1e47aba06f0e
-rw-r--r--Android.bp2
-rw-r--r--Cargo.lock17
-rw-r--r--Cargo.toml10
-rw-r--r--Cargo.toml.orig6
-rw-r--r--METADATA6
-rw-r--r--src/iterators/flat_pairs.rs22
-rw-r--r--src/iterators/line_index.rs91
-rw-r--r--src/iterators/mod.rs1
-rw-r--r--src/iterators/pair.rs28
-rw-r--r--src/iterators/pairs.rs90
-rw-r--r--src/parser_state.rs2
-rw-r--r--src/position.rs110
12 files changed, 207 insertions, 178 deletions
diff --git a/Android.bp b/Android.bp
index 58283e0..d49f319 100644
--- a/Android.bp
+++ b/Android.bp
@@ -41,7 +41,7 @@ rust_library_host {
name: "libpest",
crate_name: "pest",
cargo_env_compat: true,
- cargo_pkg_version: "2.5.4",
+ cargo_pkg_version: "2.5.5",
srcs: ["src/lib.rs"],
edition: "2021",
features: [
diff --git a/Cargo.lock b/Cargo.lock
index 5741be3..053e07f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,12 +3,6 @@
version = 3
[[package]]
-name = "bytecount"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
-
-[[package]]
name = "itoa"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -22,9 +16,8 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "pest"
-version = "2.5.4"
+version = "2.5.5"
dependencies = [
- "bytecount",
"memchr",
"serde",
"serde_json",
@@ -34,9 +27,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.50"
+version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2"
+checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
dependencies = [
"unicode-ident",
]
@@ -64,9 +57,9 @@ checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
[[package]]
name = "serde_json"
-version = "1.0.91"
+version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
+checksum = "7434af0dc1cbd59268aa98b4c22c131c0584d2232f6fb166efb993e2832e896a"
dependencies = [
"itoa",
"ryu",
diff --git a/Cargo.toml b/Cargo.toml
index bf8e4d5..3c1f26d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
edition = "2021"
rust-version = "1.56"
name = "pest"
-version = "2.5.4"
+version = "2.5.5"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
description = "The Elegant Parser"
homepage = "https://pest.rs/"
@@ -29,10 +29,6 @@ categories = ["parsing"]
license = "MIT/Apache-2.0"
repository = "https://github.com/pest-parser/pest"
-[dependencies.bytecount]
-version = "0.6"
-optional = true
-
[dependencies.memchr]
version = "2"
optional = true
@@ -56,10 +52,6 @@ default-features = false
[features]
const_prec_climber = []
default = ["std"]
-fast-line-col = [
- "memchr",
- "bytecount",
-]
pretty-print = [
"serde",
"serde_json",
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 7ea229f..a268321 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,7 +1,7 @@
[package]
name = "pest"
description = "The Elegant Parser"
-version = "2.5.4"
+version = "2.5.5"
edition = "2021"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest.rs/"
@@ -21,9 +21,6 @@ std = ["ucd-trie/std", "thiserror"]
pretty-print = ["serde", "serde_json"]
# Enable const fn constructor for `PrecClimber`
const_prec_climber = []
-# Enable faster `Position::line_col` calculation using SIMD
-# (note that this may have extra overhead for small inputs)
-fast-line-col = ["memchr", "bytecount"]
[dependencies]
ucd-trie = { version = "0.1.5", default-features = false }
@@ -31,4 +28,3 @@ serde = { version = "1.0.145", optional = true }
serde_json = { version = "1.0.85", optional = true}
thiserror = { version = "1.0.37", optional = true }
memchr = { version = "2", optional = true }
-bytecount = { version = "0.6", optional = true }
diff --git a/METADATA b/METADATA
index aa66385..ce195cf 100644
--- a/METADATA
+++ b/METADATA
@@ -11,13 +11,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/pest/pest-2.5.4.crate"
+ value: "https://static.crates.io/crates/pest/pest-2.5.5.crate"
}
- version: "2.5.4"
+ version: "2.5.5"
license_type: NOTICE
last_upgrade_date {
year: 2023
month: 2
- day: 3
+ day: 16
}
}
diff --git a/src/iterators/flat_pairs.rs b/src/iterators/flat_pairs.rs
index 411d88b..52a2074 100644
--- a/src/iterators/flat_pairs.rs
+++ b/src/iterators/flat_pairs.rs
@@ -11,6 +11,7 @@ use alloc::rc::Rc;
use alloc::vec::Vec;
use core::fmt;
+use super::line_index::LineIndex;
use super::pair::{self, Pair};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
@@ -28,6 +29,7 @@ pub struct FlatPairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
+ line_index: Rc<LineIndex>,
}
/// # Safety
@@ -42,6 +44,7 @@ pub unsafe fn new<R: RuleType>(
FlatPairs {
queue,
input,
+ line_index: Rc::new(LineIndex::new(input)),
start,
end,
}
@@ -107,7 +110,14 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
return None;
}
- let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) };
+ let pair = unsafe {
+ pair::new(
+ Rc::clone(&self.queue),
+ self.input,
+ Rc::clone(&self.line_index),
+ self.start,
+ )
+ };
self.next_start();
Some(pair)
@@ -122,7 +132,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {
self.next_start_from_end();
- let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
+ let pair = unsafe {
+ pair::new(
+ Rc::clone(&self.queue),
+ self.input,
+ Rc::clone(&self.line_index),
+ self.end,
+ )
+ };
Some(pair)
}
@@ -141,6 +158,7 @@ impl<'i, R: Clone> Clone for FlatPairs<'i, R> {
FlatPairs {
queue: Rc::clone(&self.queue),
input: self.input,
+ line_index: Rc::clone(&self.line_index),
start: self.start,
end: self.end,
}
diff --git a/src/iterators/line_index.rs b/src/iterators/line_index.rs
new file mode 100644
index 0000000..54871e1
--- /dev/null
+++ b/src/iterators/line_index.rs
@@ -0,0 +1,91 @@
+//! `LineIndex` to make a line_offsets, each item is an byte offset (start from 0) of the beginning of the line.
+//!
+//! For example, the text: `"hello 你好\nworld"`, the line_offsets will store `[0, 13]`.
+//!
+//! Then `line_col` with a offset just need to find the line index by binary search.
+//!
+//! Inspired by rust-analyzer's `LineIndex`:
+//! <https://github.com/rust-lang/rust/blob/1.67.0/src/tools/rust-analyzer/crates/ide-db/src/line_index.rs>
+use alloc::vec::Vec;
+
+#[derive(Clone)]
+pub struct LineIndex {
+ /// Offset (bytes) the the beginning of each line, zero-based
+ line_offsets: Vec<usize>,
+}
+
+impl LineIndex {
+ pub fn new(text: &str) -> LineIndex {
+ let mut line_offsets: Vec<usize> = alloc::vec![0];
+
+ let mut offset = 0;
+
+ for c in text.chars() {
+ offset += c.len_utf8();
+ if c == '\n' {
+ line_offsets.push(offset);
+ }
+ }
+
+ LineIndex { line_offsets }
+ }
+
+ /// Returns (line, col) of pos.
+ ///
+ /// The pos is a byte offset, start from 0, e.g. "ab" is 2, "你好" is 6
+ pub fn line_col(&self, input: &str, pos: usize) -> (usize, usize) {
+ let line = self.line_offsets.partition_point(|&it| it <= pos) - 1;
+ let first_offset = self.line_offsets[line];
+
+ // Get line str from original input, then we can get column offset
+ let line_str = &input[first_offset..pos];
+ let col = line_str.chars().count();
+
+ (line + 1, col + 1)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[allow(clippy::zero_prefixed_literal)]
+ #[test]
+ fn test_line_index() {
+ let text = "hello 你好 A🎈C\nworld";
+ let table = [
+ (00, 1, 1, 'h'),
+ (01, 1, 2, 'e'),
+ (02, 1, 3, 'l'),
+ (03, 1, 4, 'l'),
+ (04, 1, 5, 'o'),
+ (05, 1, 6, ' '),
+ (06, 1, 7, '你'),
+ (09, 1, 8, '好'),
+ (12, 1, 9, ' '),
+ (13, 1, 10, 'A'),
+ (14, 1, 11, '🎈'),
+ (18, 1, 12, 'C'),
+ (19, 1, 13, '\n'),
+ (20, 2, 1, 'w'),
+ (21, 2, 2, 'o'),
+ (22, 2, 3, 'r'),
+ (23, 2, 4, 'l'),
+ (24, 2, 5, 'd'),
+ ];
+
+ let index = LineIndex::new(text);
+ for &(offset, line, col, c) in table.iter() {
+ let res = index.line_col(text, offset);
+ assert_eq!(
+ (res.0, res.1),
+ (line, col),
+ "Expected: ({}, {}, {}, {:?})",
+ offset,
+ line,
+ col,
+ c
+ );
+ }
+ }
+}
diff --git a/src/iterators/mod.rs b/src/iterators/mod.rs
index 1a78963..7f81019 100644
--- a/src/iterators/mod.rs
+++ b/src/iterators/mod.rs
@@ -10,6 +10,7 @@
//! Types and iterators for parser output.
mod flat_pairs;
+mod line_index;
mod pair;
pub(crate) mod pairs;
mod queueable_token;
diff --git a/src/iterators/pair.rs b/src/iterators/pair.rs
index 2c81347..891b905 100644
--- a/src/iterators/pair.rs
+++ b/src/iterators/pair.rs
@@ -20,6 +20,7 @@ use core::str;
#[cfg(feature = "pretty-print")]
use serde::ser::SerializeStruct;
+use super::line_index::LineIndex;
use super::pairs::{self, Pairs};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
@@ -43,7 +44,7 @@ pub struct Pair<'i, R> {
input: &'i str,
/// Token index into `queue`.
start: usize,
- pub(crate) line_col: Option<(usize, usize)>,
+ line_index: Rc<LineIndex>,
}
/// # Safety
@@ -52,13 +53,14 @@ pub struct Pair<'i, R> {
pub unsafe fn new<R: RuleType>(
queue: Rc<Vec<QueueableToken<R>>>,
input: &str,
+ line_index: Rc<LineIndex>,
start: usize,
) -> Pair<'_, R> {
Pair {
queue,
input,
start,
- line_col: None,
+ line_index,
}
}
@@ -204,7 +206,13 @@ impl<'i, R: RuleType> Pair<'i, R> {
pub fn into_inner(self) -> Pairs<'i, R> {
let pair = self.pair();
- pairs::new(self.queue, self.input, self.start + 1, pair)
+ pairs::new(
+ self.queue,
+ self.input,
+ Some(self.line_index),
+ self.start + 1,
+ pair,
+ )
}
/// Returns the `Tokens` for the `Pair`.
@@ -245,10 +253,8 @@ impl<'i, R: RuleType> Pair<'i, R> {
/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
- match &self.line_col {
- Some(line_col) => (line_col.0, line_col.1),
- None => self.as_span().start_pos().line_col(),
- }
+ let pos = self.pos(self.start);
+ self.line_index.line_col(self.input, pos)
}
fn pair(&self) -> usize {
@@ -273,7 +279,13 @@ impl<'i, R: RuleType> Pairs<'i, R> {
/// Create a new `Pairs` iterator containing just the single `Pair`.
pub fn single(pair: Pair<'i, R>) -> Self {
let end = pair.pair();
- pairs::new(pair.queue, pair.input, pair.start, end)
+ pairs::new(
+ pair.queue,
+ pair.input,
+ Some(pair.line_index),
+ pair.start,
+ end,
+ )
}
}
diff --git a/src/iterators/pairs.rs b/src/iterators/pairs.rs
index d4596b0..c21a7fa 100644
--- a/src/iterators/pairs.rs
+++ b/src/iterators/pairs.rs
@@ -20,27 +20,12 @@ use core::str;
use serde::ser::SerializeStruct;
use super::flat_pairs::{self, FlatPairs};
+use super::line_index::LineIndex;
use super::pair::{self, Pair};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
-use crate::{position, RuleType};
+use crate::RuleType;
-#[derive(Clone)]
-pub struct Cursor {
- pub line: usize,
- pub col: usize,
- pub end: usize,
-}
-
-impl Default for Cursor {
- fn default() -> Cursor {
- Cursor {
- line: 1,
- col: 1,
- end: 0,
- }
- }
-}
/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
///
/// [`Pair`]: struct.Pair.html
@@ -52,21 +37,27 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
- cursor: Cursor,
+ line_index: Rc<LineIndex>,
}
pub fn new<R: RuleType>(
queue: Rc<Vec<QueueableToken<R>>>,
input: &str,
+ line_index: Option<Rc<LineIndex>>,
start: usize,
end: usize,
) -> Pairs<'_, R> {
+ let line_index = match line_index {
+ Some(line_index) => line_index,
+ None => Rc::new(LineIndex::new(input)),
+ };
+
Pairs {
queue,
input,
start,
end,
- cursor: Cursor::default(),
+ line_index,
}
}
@@ -199,7 +190,14 @@ impl<'i, R: RuleType> Pairs<'i, R> {
#[inline]
pub fn peek(&self) -> Option<Pair<'i, R>> {
if self.start < self.end {
- Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) })
+ Some(unsafe {
+ pair::new(
+ Rc::clone(&self.queue),
+ self.input,
+ Rc::clone(&self.line_index),
+ self.start,
+ )
+ })
} else {
None
}
@@ -237,42 +235,13 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
}
-
- /// Move the cursor (line, col) by a part of the input.
- fn move_cursor(&mut self, input: &str, start: usize, end: usize) -> (usize, usize) {
- // Move cursor for some skiped characters (by skip(n))
- let prev_end = self.cursor.end;
- if prev_end != start {
- self.move_cursor(input, prev_end, start);
- }
-
- let (prev_line, prev_col) = (self.cursor.line, self.cursor.col);
-
- let part = &input[self.cursor.end..end];
- let (l, c) = position::line_col(part, part.len(), (0, 0));
-
- self.cursor.line += l;
- // Has new line
- if l > 0 {
- self.cursor.col = c;
- } else {
- self.cursor.col += c;
- }
- self.cursor.end = end;
-
- (prev_line, prev_col)
- }
}
impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;
fn next(&mut self) -> Option<Self::Item> {
- let mut pair = self.peek()?;
- let span = pair.as_span();
-
- let (l, c) = self.move_cursor(self.input, span.start(), span.end());
- pair.line_col = Some((l, c));
+ let pair = self.peek()?;
self.start = self.pair() + 1;
Some(pair)
@@ -287,7 +256,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
self.end = self.pair_from_end();
- let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
+ let pair = unsafe {
+ pair::new(
+ Rc::clone(&self.queue),
+ self.input,
+ Rc::clone(&self.line_index),
+ self.end,
+ )
+ };
Some(pair)
}
@@ -478,26 +454,14 @@ mod tests {
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
- assert_eq!(
- (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
- (1, 4, 3)
- );
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (2, 1));
- assert_eq!(
- (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
- (2, 2, 5)
- );
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "fgh");
assert_eq!(pair.line_col(), (2, 2));
- assert_eq!(
- (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
- (2, 5, 8)
- );
}
#[test]
diff --git a/src/parser_state.rs b/src/parser_state.rs
index 609de55..f58de00 100644
--- a/src/parser_state.rs
+++ b/src/parser_state.rs
@@ -157,7 +157,7 @@ where
match f(state) {
Ok(state) => {
let len = state.queue.len();
- Ok(pairs::new(Rc::new(state.queue), input, 0, len))
+ Ok(pairs::new(Rc::new(state.queue), input, None, 0, len))
}
Err(mut state) => {
let variant = if state.reached_call_limit() {
diff --git a/src/position.rs b/src/position.rs
index b7b3c10..465ff97 100644
--- a/src/position.rs
+++ b/src/position.rs
@@ -138,8 +138,43 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}
+ let mut pos = self.pos;
+ let slice = &self.input[..pos];
+ let mut chars = slice.chars().peekable();
+
+ let mut line_col = (1, 1);
+
+ while pos != 0 {
+ match chars.next() {
+ Some('\r') => {
+ if let Some(&'\n') = chars.peek() {
+ chars.next();
+
+ if pos == 1 {
+ pos -= 1;
+ } else {
+ pos -= 2;
+ }
+
+ line_col = (line_col.0 + 1, 1);
+ } else {
+ pos -= 1;
+ line_col = (line_col.0, line_col.1 + 1);
+ }
+ }
+ Some('\n') => {
+ pos -= 1;
+ line_col = (line_col.0 + 1, 1);
+ }
+ Some(c) => {
+ pos -= c.len_utf8();
+ line_col = (line_col.0, line_col.1 + 1);
+ }
+ None => unreachable!(),
+ }
+ }
- line_col(self.input, self.pos, (1, 1))
+ line_col
}
/// Returns the entire line of the input that contains this `Position`.
@@ -452,79 +487,6 @@ impl<'i> Hash for Position<'i> {
}
}
-/// Returns the line and column of the given `pos` in `input`.
-pub(crate) fn line_col(input: &str, pos: usize, start: (usize, usize)) -> (usize, usize) {
- #[cfg(feature = "fast-line-col")]
- {
- fast_line_col(input, pos, start)
- }
- #[cfg(not(feature = "fast-line-col"))]
- {
- original_line_col(input, pos, start)
- }
-}
-
-#[inline]
-#[cfg(not(feature = "fast-line-col"))]
-pub(crate) fn original_line_col(
- input: &str,
- mut pos: usize,
- start: (usize, usize),
-) -> (usize, usize) {
- // Position's pos is always a UTF-8 border.
- let slice = &input[..pos];
- let mut chars = slice.chars().peekable();
-
- let mut line_col = start;
-
- while pos != 0 {
- match chars.next() {
- Some('\r') => {
- if let Some(&'\n') = chars.peek() {
- chars.next();
-
- if pos == 1 {
- pos -= 1;
- } else {
- pos -= 2;
- }
-
- line_col = (line_col.0 + 1, 1);
- } else {
- pos -= 1;
- line_col = (line_col.0, line_col.1 + 1);
- }
- }
- Some('\n') => {
- pos -= 1;
- line_col = (line_col.0 + 1, 1);
- }
- Some(c) => {
- pos -= c.len_utf8();
- line_col = (line_col.0, line_col.1 + 1);
- }
- None => unreachable!(),
- }
- }
-
- line_col
-}
-
-#[inline]
-#[cfg(feature = "fast-line-col")]
-fn fast_line_col(input: &str, pos: usize, start: (usize, usize)) -> (usize, usize) {
- // Position's pos is always a UTF-8 border.
- let slice = &input[..pos];
-
- let prec_ln = memchr::memrchr(b'\n', slice.as_bytes());
- if let Some(prec_nl_pos) = prec_ln {
- let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + start.0;
- (lines, slice[prec_nl_pos..].chars().count())
- } else {
- (start.0, slice.chars().count() + start.1)
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;