diff options
Diffstat (limited to 'src/combinator')
-rw-r--r-- | src/combinator/branch.rs | 36 | ||||
-rw-r--r-- | src/combinator/core.rs | 28 | ||||
-rw-r--r-- | src/combinator/debug/internals.rs | 301 | ||||
-rw-r--r-- | src/combinator/debug/mod.rs | 91 | ||||
-rw-r--r-- | src/combinator/mod.rs | 110 | ||||
-rw-r--r-- | src/combinator/multi.rs | 1061 | ||||
-rw-r--r-- | src/combinator/parser.rs | 252 | ||||
-rw-r--r-- | src/combinator/sequence.rs | 13 | ||||
-rw-r--r-- | src/combinator/tests.rs | 109 |
9 files changed, 1617 insertions, 384 deletions
diff --git a/src/combinator/branch.rs b/src/combinator/branch.rs index b909ff1..7fdcf8d 100644 --- a/src/combinator/branch.rs +++ b/src/combinator/branch.rs @@ -1,6 +1,6 @@ +use crate::combinator::trace; use crate::error::{ErrMode, ErrorKind, ParserError}; use crate::stream::Stream; -use crate::trace::trace; use crate::*; #[doc(inline)] @@ -16,7 +16,10 @@ pub trait Alt<I, O, E> { /// Pick the first successful parser /// -/// For tight control over the error, add a final case using [`fail`][crate::combinator::fail]. +/// To stop on an error, rather than trying further cases, see +/// [`cut_err`][crate::combinator::cut_err] ([example][crate::_tutorial::chapter_6]). +/// +/// For tight control over the error when no match is found, add a final case using [`fail`][crate::combinator::fail]. /// Alternatively, with a [custom error type][crate::_topic::error], it is possible to track all /// errors or return the error of the parser that went the farthest in the input data. /// @@ -65,6 +68,11 @@ pub trait Permutation<I, O, E> { /// It takes as argument a tuple of parsers, and returns a /// tuple of the parser results. /// +/// To stop on an error, rather than trying further permutations, see +/// [`cut_err`][crate::combinator::cut_err] ([example][crate::_tutorial::chapter_6]). +/// +/// # Example +/// /// ```rust /// # use winnow::{error::ErrMode,error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; @@ -170,6 +178,30 @@ macro_rules! alt_trait_impl( ); ); +macro_rules! succ ( + (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); + (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); + (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); + (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); + (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); + (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); + (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); + (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); + (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); + (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); + (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); + (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); + (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); + (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); + (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); + (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); + (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); + (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); + (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); + (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); + (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); +); + macro_rules! alt_trait_inner( ($it:tt, $self:expr, $input:expr, $start:ident, $err:expr, $head:ident $($id:ident)+) => ({ $input.reset($start.clone()); diff --git a/src/combinator/core.rs b/src/combinator/core.rs index d784b4e..efd7758 100644 --- a/src/combinator/core.rs +++ b/src/combinator/core.rs @@ -1,6 +1,6 @@ +use crate::combinator::trace; use crate::error::{ErrMode, ErrorKind, Needed, ParserError}; use crate::stream::Stream; -use crate::trace::trace; use crate::*; /// Return the remaining input. @@ -225,6 +225,8 @@ where /// This commits the parse result, preventing alternative branch paths like with /// [`winnow::combinator::alt`][crate::combinator::alt]. /// +/// See the [tutorial][crate::_tutorial::chapter_6] for more details. +/// /// # Example /// /// Without `cut_err`: @@ -331,7 +333,7 @@ where /// Call the iterator's [`ParserIterator::finish`] method to get the remaining input if successful, /// or the error value if we encountered an error. /// -/// On [`ErrMode::Backtrack`], iteration will stop. To instead chain an error up, see [`cut_err`]. +/// On [`ErrMode::Backtrack`], iteration will stop. To instead chain an error up, see [`cut_err`]. /// /// # Example /// @@ -432,11 +434,16 @@ enum State<E> { Incomplete(Needed), } -/// Always succeeds with given value without consuming any input. +/// Succeed, consuming no input /// /// For example, it can be used as the last alternative in `alt` to /// specify the default case. /// +/// Useful with: +/// - [`Parser::value`] +/// - [`Parser::default_value`] +/// - [`Parser::map`] +/// /// **Note:** This never advances the [`Stream`] /// /// # Example @@ -445,16 +452,13 @@ enum State<E> { /// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; /// # use winnow::prelude::*; /// use winnow::combinator::alt; -/// use winnow::combinator::success; -/// -/// let mut parser = success::<_,_,InputError<_>>(10); -/// assert_eq!(parser.parse_peek("xyz"), Ok(("xyz", 10))); +/// use winnow::combinator::empty; /// /// fn sign(input: &str) -> IResult<&str, isize> { /// alt(( /// '-'.value(-1), /// '+'.value(1), -/// success::<_,_,InputError<_>>(1) +/// empty.value(1) /// )).parse_peek(input) /// } /// assert_eq!(sign("+10"), Ok(("10", 1))); @@ -462,7 +466,13 @@ enum State<E> { /// assert_eq!(sign("10"), Ok(("10", 1))); /// ``` #[doc(alias = "value")] -#[doc(alias = "empty")] +#[doc(alias = "success")] +pub fn empty<I: Stream, E: ParserError<I>>(_input: &mut I) -> PResult<(), E> { + Ok(()) +} + +/// Deprecated, replaced with [`empty`] + [`Parser::value`] +#[deprecated(since = "0.5.35", note = "Replaced with empty.value(...)`")] pub fn success<I: Stream, O: Clone, E: ParserError<I>>(val: O) -> impl Parser<I, O, E> { trace("success", move |_input: &mut I| Ok(val.clone())) } diff --git a/src/combinator/debug/internals.rs b/src/combinator/debug/internals.rs new file mode 100644 index 0000000..c38b11e --- /dev/null +++ b/src/combinator/debug/internals.rs @@ -0,0 +1,301 @@ +#![cfg(feature = "std")] + +use std::io::Write; + +use crate::error::ErrMode; +use crate::stream::Stream; +use crate::*; + +pub struct Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + parser: P, + name: D, + call_count: usize, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<P, D, I, O, E> Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + #[inline(always)] + pub fn new(parser: P, name: D) -> Self { + Self { + parser, + name, + call_count: 0, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<P, D, I, O, E> Parser<I, O, E> for Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + let depth = Depth::new(); + let original = i.checkpoint(); + start(*depth, &self.name, self.call_count, i); + + let res = self.parser.parse_next(i); + + let consumed = i.offset_from(&original); + let severity = Severity::with_result(&res); + end(*depth, &self.name, self.call_count, consumed, severity); + self.call_count += 1; + + res + } +} + +pub struct Depth { + depth: usize, + inc: bool, +} + +impl Depth { + pub fn new() -> Self { + let depth = DEPTH.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + let inc = true; + Self { depth, inc } + } + + pub fn existing() -> Self { + let depth = DEPTH.load(std::sync::atomic::Ordering::SeqCst); + let inc = false; + Self { depth, inc } + } +} + +impl Drop for Depth { + fn drop(&mut self) { + if self.inc { + let _ = DEPTH.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); + } + } +} + +impl AsRef<usize> for Depth { + #[inline(always)] + fn as_ref(&self) -> &usize { + &self.depth + } +} + +impl crate::lib::std::ops::Deref for Depth { + type Target = usize; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.depth + } +} + +static DEPTH: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); + +pub enum Severity { + Success, + Backtrack, + Cut, + Incomplete, +} + +impl Severity { + pub fn with_result<T, E>(result: &Result<T, ErrMode<E>>) -> Self { + match result { + Ok(_) => Self::Success, + Err(ErrMode::Backtrack(_)) => Self::Backtrack, + Err(ErrMode::Cut(_)) => Self::Cut, + Err(ErrMode::Incomplete(_)) => Self::Incomplete, + } + } +} + +pub fn start<I: Stream>( + depth: usize, + name: &dyn crate::lib::std::fmt::Display, + count: usize, + input: &I, +) { + let gutter_style = anstyle::Style::new().bold(); + let input_style = anstyle::Style::new().underline(); + let eof_style = anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Cyan.into())); + + let (call_width, input_width) = column_widths(); + + let count = if 0 < count { + format!(":{count}") + } else { + "".to_owned() + }; + let call_column = format!("{:depth$}> {name}{count}", ""); + + // The debug version of `slice` might be wider, either due to rendering one byte as two nibbles or + // escaping in strings. + let mut debug_slice = format!("{:#?}", input.raw()); + let (debug_slice, eof) = if let Some(debug_offset) = debug_slice + .char_indices() + .enumerate() + .find_map(|(pos, (offset, _))| (input_width <= pos).then_some(offset)) + { + debug_slice.truncate(debug_offset); + let eof = ""; + (debug_slice, eof) + } else { + let eof = if debug_slice.chars().count() < input_width { + "∅" + } else { + "" + }; + (debug_slice, eof) + }; + + let writer = anstream::stderr(); + let mut writer = writer.lock(); + let _ = writeln!( + writer, + "{call_column:call_width$} {gutter_style}|{gutter_reset} {input_style}{debug_slice}{input_reset}{eof_style}{eof}{eof_reset}", + gutter_style=gutter_style.render(), + gutter_reset=gutter_style.render_reset(), + input_style=input_style.render(), + input_reset=input_style.render_reset(), + eof_style=eof_style.render(), + eof_reset=eof_style.render_reset(), + ); +} + +pub fn end( + depth: usize, + name: &dyn crate::lib::std::fmt::Display, + count: usize, + consumed: usize, + severity: Severity, +) { + let gutter_style = anstyle::Style::new().bold(); + + let (call_width, _) = column_widths(); + + let count = if 0 < count { + format!(":{count}") + } else { + "".to_owned() + }; + let call_column = format!("{:depth$}< {name}{count}", ""); + + let (status_style, status) = match severity { + Severity::Success => { + let style = anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Green.into())); + let status = format!("+{}", consumed); + (style, status) + } + Severity::Backtrack => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Yellow.into())), + "backtrack".to_owned(), + ), + Severity::Cut => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), + "cut".to_owned(), + ), + Severity::Incomplete => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), + "incomplete".to_owned(), + ), + }; + + let writer = anstream::stderr(); + let mut writer = writer.lock(); + let _ = writeln!( + writer, + "{status_style}{call_column:call_width$}{status_reset} {gutter_style}|{gutter_reset} {status_style}{status}{status_reset}", + gutter_style=gutter_style.render(), + gutter_reset=gutter_style.render_reset(), + status_style=status_style.render(), + status_reset=status_style.render_reset(), + ); +} + +pub fn result(depth: usize, name: &dyn crate::lib::std::fmt::Display, severity: Severity) { + let gutter_style = anstyle::Style::new().bold(); + + let (call_width, _) = column_widths(); + + let call_column = format!("{:depth$}| {name}", ""); + + let (status_style, status) = match severity { + Severity::Success => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Green.into())), + "", + ), + Severity::Backtrack => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Yellow.into())), + "backtrack", + ), + Severity::Cut => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), + "cut", + ), + Severity::Incomplete => ( + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), + "incomplete", + ), + }; + + let writer = anstream::stderr(); + let mut writer = writer.lock(); + let _ = writeln!( + writer, + "{status_style}{call_column:call_width$}{status_reset} {gutter_style}|{gutter_reset} {status_style}{status}{status_reset}", + gutter_style=gutter_style.render(), + gutter_reset=gutter_style.render_reset(), + status_style=status_style.render(), + status_reset=status_style.render_reset(), + ); +} + +fn column_widths() -> (usize, usize) { + let term_width = term_width(); + + let min_call_width = 40; + let min_input_width = 20; + let decor_width = 3; + let extra_width = term_width + .checked_sub(min_call_width + min_input_width + decor_width) + .unwrap_or_default(); + let call_width = min_call_width + 2 * extra_width / 3; + let input_width = min_input_width + extra_width / 3; + + (call_width, input_width) +} + +fn term_width() -> usize { + columns_env().or_else(query_width).unwrap_or(80) +} + +fn query_width() -> Option<usize> { + use is_terminal::IsTerminal; + if std::io::stderr().is_terminal() { + terminal_size::terminal_size().map(|(w, _h)| w.0.into()) + } else { + None + } +} + +fn columns_env() -> Option<usize> { + std::env::var("COLUMNS") + .ok() + .and_then(|c| c.parse::<usize>().ok()) +} diff --git a/src/combinator/debug/mod.rs b/src/combinator/debug/mod.rs new file mode 100644 index 0000000..ee4c293 --- /dev/null +++ b/src/combinator/debug/mod.rs @@ -0,0 +1,91 @@ +#![cfg_attr(feature = "debug", allow(clippy::std_instead_of_core))] + +#[cfg(feature = "debug")] +mod internals; + +use crate::error::ErrMode; +use crate::stream::Stream; +use crate::Parser; + +#[cfg(all(feature = "debug", not(feature = "std")))] +compile_error!("`debug` requires `std`"); + +/// Trace the execution of the parser +/// +/// Note that [`Parser::context`] also provides high level trace information. +/// +/// See [tutorial][crate::_tutorial::chapter_8] for more details. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::token::take_while; +/// # use winnow::stream::AsChar; +/// # use winnow::prelude::*; +/// use winnow::combinator::trace; +/// +/// fn short_alpha<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { +/// trace("short_alpha", +/// take_while(3..=6, AsChar::is_alpha) +/// ).parse_next(s) +/// } +/// +/// assert_eq!(short_alpha.parse_peek(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(short_alpha.parse_peek(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); +/// assert_eq!(short_alpha.parse_peek(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(short_alpha.parse_peek(b"ed"), Err(ErrMode::Backtrack(InputError::new(&b"ed"[..], ErrorKind::Slice)))); +/// assert_eq!(short_alpha.parse_peek(b"12345"), Err(ErrMode::Backtrack(InputError::new(&b"12345"[..], ErrorKind::Slice)))); +/// ``` +#[cfg_attr(not(feature = "debug"), allow(unused_variables))] +#[cfg_attr(not(feature = "debug"), allow(unused_mut))] +#[cfg_attr(not(feature = "debug"), inline(always))] +pub fn trace<I: Stream, O, E>( + name: impl crate::lib::std::fmt::Display, + parser: impl Parser<I, O, E>, +) -> impl Parser<I, O, E> { + #[cfg(feature = "debug")] + { + internals::Trace::new(parser, name) + } + #[cfg(not(feature = "debug"))] + { + parser + } +} + +#[cfg_attr(not(feature = "debug"), allow(unused_variables))] +pub(crate) fn trace_result<T, E>( + name: impl crate::lib::std::fmt::Display, + res: &Result<T, ErrMode<E>>, +) { + #[cfg(feature = "debug")] + { + let depth = internals::Depth::existing(); + let severity = internals::Severity::with_result(res); + internals::result(*depth, &name, severity); + } +} + +#[test] +#[cfg(feature = "std")] +#[cfg_attr(miri, ignore)] +#[cfg(unix)] +#[cfg(feature = "debug")] +fn example() { + use term_transcript::{test::TestConfig, ShellOptions}; + + let path = snapbox::cmd::compile_example("string", ["--features=debug"]).unwrap(); + + let current_dir = path.parent().unwrap(); + let cmd = path.file_name().unwrap(); + // HACK: term_transcript doesn't allow non-UTF8 paths + let cmd = format!("./{}", cmd.to_string_lossy()); + + TestConfig::new( + ShellOptions::default() + .with_current_dir(current_dir) + .with_env("CLICOLOR_FORCE", "1"), + ) + .test("assets/trace.svg", [cmd.as_str()]); +} diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index ec68e48..da5fa79 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -6,81 +6,83 @@ //! //! Those are used to recognize the lowest level elements of your grammar, like, "here is a dot", or "here is an big endian integer". //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`one_of`][crate::token::one_of] | `one_of(['a', 'b', 'c'])` | `"abc"` | `Ok(("bc", 'a'))` |Matches one of the provided characters (works with non ASCII characters too)| -//! | [`none_of`][crate::token::none_of] | `none_of(['a', 'b', 'c'])` | `"xyab"` | `Ok(("yab", 'x'))` |Matches anything but the provided characters| -//! | [`tag`][crate::token::tag] | `"hello"` | `"hello world"` | `Ok((" world", "hello"))` |Recognizes a specific suite of characters or bytes| -//! | [`tag_no_case`][crate::token::tag_no_case] | `tag_no_case("hello")` | `"HeLLo World"` | `Ok((" World", "HeLLo"))` |Case insensitive comparison. Note that case insensitive comparison is not well defined for unicode, and that you might have bad surprises| -//! | [`take`][crate::token::take] | `take(4)` | `"hello"` | `Ok(("o", "hell"))` |Takes a specific number of bytes or characters| -//! | [`take_while`][crate::token::take_while] | `take_while(0.., is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided pattern matches.| -//! | [`take_till0`][crate::token::take_till0] | `take_till0(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided pattern matches. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(0.., \|c\| !f(c))`| -//! | [`take_until0`][crate::token::take_until0] | `take_until0("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`one_of`][crate::token::one_of] | `one_of(['a', 'b', 'c'])` | `"abc"` | `"bc"` | `Ok('a')` |Matches one of the provided characters (works with non ASCII characters too)| +//! | [`none_of`][crate::token::none_of] | `none_of(['a', 'b', 'c'])` | `"xyab"` | `"yab"` | `Ok('x')` |Matches anything but the provided characters| +//! | [`tag`][crate::token::tag] | `"hello"` | `"hello world"` | `" world"` | `Ok("hello")` |Recognizes a specific suite of characters or bytes (see also [`Caseless`][crate::ascii::Caseless])| +//! | [`take`][crate::token::take] | `take(4)` | `"hello"` | `"o"` | `Ok("hell")` |Takes a specific number of bytes or characters| +//! | [`take_while`][crate::token::take_while] | `take_while(0.., is_alphabetic)` | `"abc123"` | `"123"` | `Ok("abc")` |Returns the longest list of bytes for which the provided pattern matches.| +//! | [`take_till0`][crate::token::take_till0] | `take_till0(is_alphabetic)` | `"123abc"` | `"abc"` | `Ok("123")` |Returns the longest list of bytes or characters until the provided pattern matches. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(0.., \|c\| !f(c))`| +//! | [`take_until`][crate::token::take_until] | `take_until(0.., "world")` | `"Hello world"` | `"world"` | `Ok("Hello ")` |Returns the longest list of bytes or characters until the provided tag is found.| //! //! ## Choice combinators //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`alt`][crate::combinator::alt] | `alt(("ab", "cd"))` | `"cdef"` | `Ok(("ef", "cd"))` |Try a list of parsers and return the result of the first successful one| -//! | [`dispatch`][crate::combinator::dispatch] | \- | \- | \- | `match` for parsers | -//! | [`permutation`][crate::combinator::permutation] | `permutation(("ab", "cd", "12"))` | `"cd12abc"` | `Ok(("c", ("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`alt`] | `alt(("ab", "cd"))` | `"cdef"` | `"ef"` | `Ok("cd")` |Try a list of parsers and return the result of the first successful one| +//! | [`dispatch`] | \- | \- | \- | \- | `match` for parsers | +//! | [`permutation`] | `permutation(("ab", "cd", "12"))` | `"cd12abc"` | `"c"` | `Ok(("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| //! //! ## Sequence combinators //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`(...)` (tuples)][crate::Parser] | `("ab", "XY", take(1))` | `"abXYZ!"` | `Ok(("!", ("ab", "XY", "Z")))` |Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| -//! | [`delimited`] | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || -//! | [`preceded`] | `preceded("ab", "XY")` | `"abXYZ"` | `Ok(("Z", "XY"))` || -//! | [`terminated`] | `terminated("ab", "XY")` | `"abXYZ"` | `Ok(("Z", "ab"))` || -//! | [`separated_pair`] | `separated_pair("hello", char(','), "world")` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`(...)` (tuples)][crate::Parser] | `("ab", "XY", take(1))` | `"abXYZ!"` | `"!"` | `Ok(("ab", "XY", "Z"))` |Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| +//! | [`seq!`] | `seq!(_: char('('), take(2), _: char(')'))` | `"(ab)cd"` | `"cd"` | `Ok("ab")` || +//! | [`delimited`] | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `"cd"` | `Ok("ab")` || +//! | [`preceded`] | `preceded("ab", "XY")` | `"abXYZ"` | `"Z"` | `Ok("XY")` || +//! | [`terminated`] | `terminated("ab", "XY")` | `"abXYZ"` | `"Z"` | `Ok("ab")` || +//! | [`separated_pair`] | `separated_pair("hello", char(','), "world")` | `"hello,world!"` | `"!"` | `Ok(("hello", "world"))` || //! //! ## Applying a parser multiple times //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`repeat`][crate::combinator::repeat] | `repeat(1..=3, "ab")` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -//! | [`repeat_till0`][crate::combinator::repeat_till0] | `repeat_till0(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| -//! | [`separated0`][crate::combinator::separated0] | `separated0("ab", ",")` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated1` works like `separated0` but must returns at least one element| -//! | [`fold_repeat`][crate::combinator::fold_repeat] | `fold_repeat(1..=2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`repeat`] | `repeat(1..=3, "ab")` | `"ababc"` | `"c"` | `Ok(vec!["ab", "ab"])` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| +//! | [`repeat_till`] | `repeat_till(0.., tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `"g"` | `Ok((vec!["ab", "ab"], "ef"))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +//! | [`separated`] | `separated(1..=3, "ab", ",")` | `"ab,ab,ab."` | `"."` | `Ok(vec!["ab", "ab", "ab"])` |Applies the parser and separator between m and n times (n included) and returns the list of results in a Vec| +//! | [`fold_repeat`] | `fold_repeat(1..=2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `[3]` | `Ok(3)` |Applies the parser between m and n times (n included) and folds the list of return value| //! //! ## Partial related //! -//! - [`eof`][eof]: Returns its input if it is at the end of input data +//! - [`eof`]: Returns its input if it is at the end of input data //! - [`Parser::complete_err`]: Replaces an `Incomplete` returned by the child parser with an `Backtrack` //! //! ## Modifiers //! -//! - [`cond`][cond]: Conditional combinator. Wraps another parser and calls it if the condition is met -//! - [`Parser::flat_map`][crate::Parser::flat_map]: method to map a new parser from the output of the first parser, then apply that parser over the rest of the input -//! - [`Parser::value`][crate::Parser::value]: method to replace the result of a parser -//! - [`Parser::map`][crate::Parser::map]: method to map a function on the result of a parser -//! - [`Parser::and_then`][crate::Parser::and_then]: Applies a second parser over the output of the first one -//! - [`Parser::verify_map`][Parser::verify_map]: Maps a function returning an `Option` on the output of a parser -//! - [`Parser::try_map`][Parser::try_map]: Maps a function returning a `Result` on the output of a parser -//! - [`Parser::parse_to`][crate::Parser::parse_to]: Apply [`std::str::FromStr`] to the output of the parser -//! - [`not`][not]: Returns a result only if the embedded parser returns `Backtrack` or `Incomplete`. Does not consume the input -//! - [`opt`][opt]: Make the underlying parser optional -//! - [`peek`][peek]: Returns a result without consuming the input -//! - [`Parser::recognize`][Parser::recognize]: If the child parser was successful, return the consumed input as the produced value -//! - [`Parser::with_recognized`][Parser::with_recognized]: If the child parser was successful, return a tuple of the consumed input and the produced output. -//! - [`Parser::span`][Parser::span]: If the child parser was successful, return the location of the consumed input as the produced value -//! - [`Parser::with_span`][Parser::with_span]: If the child parser was successful, return a tuple of the location of the consumed input and the produced output. +//! - [`cond`]: Conditional combinator. Wraps another parser and calls it if the condition is met +//! - [`Parser::flat_map`]: method to map a new parser from the output of the first parser, then apply that parser over the rest of the input +//! - [`Parser::value`]: method to replace the result of a parser +//! - [`Parser::default_value`]: method to replace the result of a parser +//! - [`Parser::void`]: method to discard the result of a parser +//! - [`Parser::map`]: method to map a function on the result of a parser +//! - [`Parser::and_then`]: Applies a second parser over the output of the first one +//! - [`Parser::verify_map`]: Maps a function returning an `Option` on the output of a parser +//! - [`Parser::try_map`]: Maps a function returning a `Result` on the output of a parser +//! - [`Parser::parse_to`]: Apply [`std::str::FromStr`] to the output of the parser +//! - [`not`]: Returns a result only if the embedded parser returns `Backtrack` or `Incomplete`. Does not consume the input +//! - [`opt`]: Make the underlying parser optional +//! - [`peek`]: Returns a result without consuming the input +//! - [`Parser::recognize`]: If the child parser was successful, return the consumed input as the produced value +//! - [`Parser::with_recognized`]: If the child parser was successful, return a tuple of the consumed input and the produced output. +//! - [`Parser::span`]: If the child parser was successful, return the location of the consumed input as the produced value +//! - [`Parser::with_span`]: If the child parser was successful, return a tuple of the location of the consumed input and the produced output. //! - [`Parser::verify`]: Returns the result of the child parser if it satisfies a verification function //! //! ## Error management and debugging //! //! - [`cut_err`]: Commit the parse result, disallowing alternative parsers from being attempted -//! - [`backtrack_err`]: Attemmpts a parse, allowing alternative parsers to be attempted despite +//! - [`backtrack_err`]: Attempts a parse, allowing alternative parsers to be attempted despite //! use of `cut_err` //! - [`Parser::context`]: Add context to the error if the parser fails -//! - [`trace`][crate::trace::trace]: Print the parse state with the `debug` feature flag +//! - [`trace`]: Print the parse state with the `debug` feature flag //! - [`todo()`]: Placeholder parser //! //! ## Remaining combinators //! -//! - [`success`][success]: Returns a value without consuming any input, always succeeds -//! - [`fail`][fail]: Inversion of `success`. Always fails. +//! - [`success`]: Returns a value without consuming any input, always succeeds +//! - [`fail`]: Inversion of `success`. Always fails. //! - [`Parser::by_ref`]: Allow moving `&mut impl Parser` into other parsers //! //! ## Text parsing @@ -90,8 +92,8 @@ //! - [`crlf`][crate::ascii::crlf]: Recognizes the string `\r\n` //! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) //! - [`newline`][crate::ascii::newline]: Matches a newline character `\n` -//! - [`not_line_ending`][crate::ascii::not_line_ending]: Recognizes a string of any char except `\r` or `\n` -//! - [`rest`][rest]: Return the remaining input +//! - [`till_line_ending`][crate::ascii::till_line_ending]: Recognizes a string of any char except `\r` or `\n` +//! - [`rest`]: Return the remaining input //! //! - [`alpha0`][crate::ascii::alpha0]: Recognizes zero or more lowercase and uppercase alphabetic characters: `[a-zA-Z]`. [`alpha1`][crate::ascii::alpha1] does the same but returns at least one character //! - [`alphanumeric0`][crate::ascii::alphanumeric0]: Recognizes zero or more numerical and alphabetic characters: `[0-9a-zA-Z]`. [`alphanumeric1`][crate::ascii::alphanumeric1] does the same but returns at least one character @@ -102,7 +104,7 @@ //! - [`oct_digit0`][crate::ascii::oct_digit0]: Recognizes zero or more octal characters: `[0-7]`. [`oct_digit1`][crate::ascii::oct_digit1] does the same but returns at least one character //! //! - [`float`][crate::ascii::float]: Parse a floating point number in a byte string -//! - [`dec_int`][crate::ascii::dec_uint]: Decode a variable-width, decimal signed integer +//! - [`dec_int`][crate::ascii::dec_int]: Decode a variable-width, decimal signed integer //! - [`dec_uint`][crate::ascii::dec_uint]: Decode a variable-width, decimal unsigned integer //! - [`hex_uint`][crate::ascii::hex_uint]: Decode a variable-width, hexadecimal integer //! @@ -149,12 +151,13 @@ //! //! - [`bits`][crate::binary::bits::bits]: Transforms the current input type (byte slice `&[u8]`) to a bit stream on which bit specific parsers and more general combinators can be applied //! - [`bytes`][crate::binary::bits::bytes]: Transforms its bits stream input back into a byte slice for the underlying parser -//! - [`take`][crate::binary::bits::take]: Take a set number of its -//! - [`tag`][crate::binary::bits::tag]: Check if a set number of bis matches a pattern +//! - [`take`][crate::binary::bits::take]: Take a set number of bits +//! - [`tag`][crate::binary::bits::tag]: Check if a set number of bits matches a pattern //! - [`bool`][crate::binary::bits::bool]: Match any one bit mod branch; mod core; +mod debug; mod multi; mod parser; mod sequence; @@ -164,6 +167,7 @@ mod tests; pub use self::branch::*; pub use self::core::*; +pub use self::debug::*; pub use self::multi::*; pub use self::parser::*; pub use self::sequence::*; diff --git a/src/combinator/multi.rs b/src/combinator/multi.rs index 1fdb753..f76d635 100644 --- a/src/combinator/multi.rs +++ b/src/combinator/multi.rs @@ -1,18 +1,18 @@ //! Combinators applying their child parser multiple times +use crate::combinator::trace; use crate::error::ErrMode; use crate::error::ErrorKind; use crate::error::ParserError; use crate::stream::Accumulate; use crate::stream::Range; use crate::stream::Stream; -use crate::trace::trace; use crate::PResult; use crate::Parser; /// [`Accumulate`] the output of a parser into a container, like `Vec` /// -/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Arguments @@ -28,7 +28,7 @@ use crate::Parser; /// /// # Example /// -/// Zero or more reptitions: +/// Zero or more repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; @@ -47,7 +47,7 @@ use crate::Parser; /// # } /// ``` /// -/// One or more reptitions: +/// One or more repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; @@ -66,7 +66,7 @@ use crate::Parser; /// # } /// ``` /// -/// Fixed number of repeitions: +/// Fixed number of repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; @@ -86,7 +86,7 @@ use crate::Parser; /// # } /// ``` /// -/// Arbitrary reptitions: +/// Arbitrary repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; @@ -115,25 +115,194 @@ use crate::Parser; #[doc(alias = "skip_many")] #[doc(alias = "skip_many1")] #[inline(always)] -pub fn repeat<I, O, C, E, F>(range: impl Into<Range>, mut f: F) -> impl Parser<I, C, E> +pub fn repeat<I, O, C, E, P>(range: impl Into<Range>, parser: P) -> Repeat<P, I, O, C, E> where I: Stream, C: Accumulate<O>, - F: Parser<I, O, E>, + P: Parser<I, O, E>, E: ParserError<I>, { - let Range { - start_inclusive, - end_inclusive, - } = range.into(); - trace("repeat", move |i: &mut I| { - match (start_inclusive, end_inclusive) { - (0, None) => repeat0_(&mut f, i), - (1, None) => repeat1_(&mut f, i), - (start, end) if Some(start) == end => repeat_n_(start, &mut f, i), - (start, end) => repeat_m_n_(start, end.unwrap_or(usize::MAX), &mut f, i), - } - }) + Repeat { + range: range.into(), + parser, + i: Default::default(), + o: Default::default(), + c: Default::default(), + e: Default::default(), + } +} + +/// Implementation of [`repeat`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Repeat<P, I, O, C, E> +where + P: Parser<I, O, E>, + I: Stream, + C: Accumulate<O>, + E: ParserError<I>, +{ + range: Range, + parser: P, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + c: core::marker::PhantomData<C>, + e: core::marker::PhantomData<E>, +} + +impl<P, I, O, E> Repeat<P, I, O, (), E> +where + P: Parser<I, O, E>, + I: Stream, + E: ParserError<I>, +{ + /// Repeats the embedded parser, calling `g` to gather the results + /// + /// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see + /// [`cut_err`][crate::combinator::cut_err]. + /// + /// # Arguments + /// * `init` A function returning the initial value. + /// * `g` The function that combines a result of `f` with + /// the current accumulator. + /// + /// **Warning:** If the parser passed to `fold` accepts empty inputs + /// (like `alpha0` or `digit0`), `fold_repeat` will return an error, + /// to prevent going into an infinite loop. + /// + /// # Example + /// + /// Zero or more repetitions: + /// ```rust + /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; + /// # use winnow::prelude::*; + /// use winnow::combinator::repeat; + /// use winnow::token::tag; + /// + /// fn parser(s: &str) -> IResult<&str, Vec<&str>> { + /// repeat( + /// 0.., + /// "abc" + /// ).fold( + /// Vec::new, + /// |mut acc: Vec<_>, item| { + /// acc.push(item); + /// acc + /// } + /// ).parse_peek(s) + /// } + /// + /// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + /// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + /// assert_eq!(parser("123123"), Ok(("123123", vec![]))); + /// assert_eq!(parser(""), Ok(("", vec![]))); + /// ``` + /// + /// One or more repetitions: + /// ```rust + /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; + /// # use winnow::prelude::*; + /// use winnow::combinator::repeat; + /// use winnow::token::tag; + /// + /// fn parser(s: &str) -> IResult<&str, Vec<&str>> { + /// repeat( + /// 1.., + /// "abc", + /// ).fold( + /// Vec::new, + /// |mut acc: Vec<_>, item| { + /// acc.push(item); + /// acc + /// } + /// ).parse_peek(s) + /// } + /// + /// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + /// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + /// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Many)))); + /// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Many)))); + /// ``` + /// + /// Arbitrary number of repetitions: + /// ```rust + /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; + /// # use winnow::prelude::*; + /// use winnow::combinator::repeat; + /// use winnow::token::tag; + /// + /// fn parser(s: &str) -> IResult<&str, Vec<&str>> { + /// repeat( + /// 0..=2, + /// "abc", + /// ).fold( + /// Vec::new, + /// |mut acc: Vec<_>, item| { + /// acc.push(item); + /// acc + /// } + /// ).parse_peek(s) + /// } + /// + /// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + /// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + /// assert_eq!(parser("123123"), Ok(("123123", vec![]))); + /// assert_eq!(parser(""), Ok(("", vec![]))); + /// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); + /// ``` + #[doc(alias = "fold_many0")] + #[doc(alias = "fold_many1")] + #[doc(alias = "fold_many_m_n")] + #[doc(alias = "fold_repeat")] + #[inline(always)] + pub fn fold<H, G, R>(mut self, mut init: H, mut g: G) -> impl Parser<I, R, E> + where + G: FnMut(R, O) -> R, + H: FnMut() -> R, + { + let Range { + start_inclusive, + end_inclusive, + } = self.range; + trace("repeat_fold", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => fold_repeat0_(&mut self.parser, &mut init, &mut g, i), + (1, None) => fold_repeat1_(&mut self.parser, &mut init, &mut g, i), + (start, end) => fold_repeat_m_n_( + start, + end.unwrap_or(usize::MAX), + &mut self.parser, + &mut init, + &mut g, + i, + ), + } + }) + } +} + +impl<P, I, O, C, E> Parser<I, C, E> for Repeat<P, I, O, C, E> +where + P: Parser<I, O, E>, + I: Stream, + C: Accumulate<O>, + E: ParserError<I>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<C, E> { + let Range { + start_inclusive, + end_inclusive, + } = self.range; + trace("repeat", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => repeat0_(&mut self.parser, i), + (1, None) => repeat1_(&mut self.parser, i), + (start, end) if Some(start) == end => repeat_n_(start, &mut self.parser, i), + (start, end) => repeat_m_n_(start, end.unwrap_or(usize::MAX), &mut self.parser, i), + } + }) + .parse_next(i) + } } fn repeat0_<I, O, C, E, F>(f: &mut F, i: &mut I) -> PResult<C, E> @@ -201,6 +370,73 @@ where } } +fn repeat_n_<I, O, C, E, F>(count: usize, f: &mut F, i: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + let mut res = C::initial(Some(count)); + + for _ in 0..count { + match f.parse_next(i) { + Ok(o) => { + res.accumulate(o); + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + } + } + + Ok(res) +} + +fn repeat_m_n_<I, O, C, E, F>(min: usize, max: usize, parse: &mut F, input: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut res = C::initial(Some(min)); + for count in 0..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match parse.parse_next(input) { + Ok(value) => { + // infinite loop check: the parser must always consume + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`repeat` parsers must always consume", + )); + } + + res.accumulate(value); + } + Err(ErrMode::Backtrack(e)) => { + if count < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(res); + } + } + Err(e) => { + return Err(e); + } + } + } + + Ok(res) +} + /// [`Accumulate`] the output of parser `f` into a container, like `Vec`, until the parser `g` /// produces a result. /// @@ -216,11 +452,11 @@ where /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::combinator::repeat_till0; +/// use winnow::combinator::repeat_till; /// use winnow::token::tag; /// /// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> { -/// repeat_till0("abc", "end").parse_peek(s) +/// repeat_till(0.., "abc", "end").parse_peek(s) /// }; /// /// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end")))); @@ -231,7 +467,11 @@ where /// # } /// ``` #[doc(alias = "many_till0")] -pub fn repeat_till0<I, O, C, P, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, (C, P), E> +pub fn repeat_till<I, O, C, P, E, F, G>( + range: impl Into<Range>, + mut f: F, + mut g: G, +) -> impl Parser<I, (C, P), E> where I: Stream, C: Accumulate<O>, @@ -239,39 +479,262 @@ where G: Parser<I, P, E>, E: ParserError<I>, { - trace("repeat_till0", move |i: &mut I| { - let mut res = C::initial(None); - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match g.parse_next(i) { - Ok(o) => return Ok((res, o)), - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - match f.parse_next(i) { - Err(e) => return Err(e.append(i, ErrorKind::Many)), - Ok(o) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert( - i, - "`repeat` parsers must always consume", - )); - } - - res.accumulate(o); + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("repeat_till", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => repeat_till0_(&mut f, &mut g, i), + (start, end) => repeat_till_m_n_(start, end.unwrap_or(usize::MAX), &mut f, &mut g, i), + } + }) +} + +/// Deprecated, replaced with [`repeat_till`] +#[deprecated(since = "0.5.35", note = "Replaced with `repeat_till`")] +#[inline(always)] +pub fn repeat_till0<I, O, C, P, E, F, G>(f: F, g: G) -> impl Parser<I, (C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + repeat_till(0.., f, g) +} + +fn repeat_till0_<I, O, C, P, E, F, G>(f: &mut F, g: &mut G, i: &mut I) -> PResult<(C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + let mut res = C::initial(None); + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + match f.parse_next(i) { + Err(e) => return Err(e.append(i, ErrorKind::Many)), + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); } + + res.accumulate(o); } } - Err(e) => return Err(e), + } + Err(e) => return Err(e), + } + } +} + +fn repeat_till_m_n_<I, O, C, P, E, F, G>( + min: usize, + max: usize, + f: &mut F, + g: &mut G, + i: &mut I, +) -> PResult<(C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(i, ErrorKind::Many))); + } + + let mut res = C::initial(Some(min)); + for _ in 0..min { + match f.parse_next(i) { + Ok(o) => { + res.accumulate(o); + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); } } + } + for count in min..=max { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(err)) => { + if count == max { + return Err(ErrMode::Backtrack(err)); + } + i.reset(start); + match f.parse_next(i) { + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); + } + + res.accumulate(o); + } + } + } + Err(e) => return Err(e), + } + } + unreachable!() +} + +/// [`Accumulate`] the output of a parser, interleaved with `sep` +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `range` The minimum and maximum number of iterations. +/// * `parser` The parser that parses the elements of the list. +/// * `sep` The parser that parses the separator between list elements. +/// +/// **Warning:** If the separator parser accepts empty inputs +/// (like `alpha0` or `digit0`), `separated` will return an error, +/// to prevent going into an infinite loop. +/// +/// # Example +/// +/// Zero or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(0.., "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +/// +/// One or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(1.., "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Fixed number of repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(2, "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("|abc", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Err(ErrMode::Backtrack(InputError::new("123abc", ErrorKind::Tag)))); +/// assert_eq!(parser("abc|def"), Err(ErrMode::Backtrack(InputError::new("def", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Arbitrary repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(0..=2, "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("|abc", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +#[doc(alias = "sep_by")] +#[doc(alias = "sep_by1")] +#[doc(alias = "separated_list0")] +#[doc(alias = "separated_list1")] +#[doc(alias = "separated_m_n")] +#[inline(always)] +pub fn separated<I, O, C, O2, E, P, S>( + range: impl Into<Range>, + mut parser: P, + mut separator: S, +) -> impl Parser<I, C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("separated", move |input: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => separated0_(&mut parser, &mut separator, input), + (1, None) => separated1_(&mut parser, &mut separator, input), + (start, end) if Some(start) == end => { + separated_n_(start, &mut parser, &mut separator, input) + } + (start, end) => separated_m_n_( + start, + end.unwrap_or(usize::MAX), + &mut parser, + &mut separator, + input, + ), + } }) } -/// [`Accumulate`] the output of a parser, interleaed with `sep` +/// [`Accumulate`] the output of a parser, interleaved with `sep` /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Arguments @@ -300,6 +763,7 @@ where /// ``` #[doc(alias = "sep_by")] #[doc(alias = "separated_list0")] +#[deprecated(since = "0.5.19", note = "Replaced with `combinator::separated`")] pub fn separated0<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> where I: Stream, @@ -309,56 +773,74 @@ where E: ParserError<I>, { trace("separated0", move |i: &mut I| { - let mut res = C::initial(None); + separated0_(&mut parser, &mut sep, i) + }) +} - let start = i.checkpoint(); - match parser.parse_next(i) { +fn separated0_<I, O, C, O2, E, P, S>( + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + + let start = input.checkpoint(); + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); + input.reset(start); + return Ok(acc); } Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); - } - } - - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match sep.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); } - Err(e) => return Err(e), - Ok(_) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert(i, "sep parsers must always consume")); - } - match parser.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); - } - Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); - } + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); } } } } - }) + } } -/// [`Accumulate`] the output of a parser, interleaed with `sep` +/// [`Accumulate`] the output of a parser, interleaved with `sep` /// /// Fails if the element parser does not produce at least one element.$ /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Arguments @@ -387,6 +869,7 @@ where /// ``` #[doc(alias = "sep_by1")] #[doc(alias = "separated_list1")] +#[deprecated(since = "0.5.19", note = "Replaced with `combinator::separated`")] pub fn separated1<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> where I: Stream, @@ -396,50 +879,209 @@ where E: ParserError<I>, { trace("separated1", move |i: &mut I| { - let mut res = C::initial(None); + separated1_(&mut parser, &mut sep, i) + }) +} + +fn separated1_<I, O, C, O2, E, P, S>( + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + + // Parse the first element + match parser.parse_next(input) { + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } - // Parse the first element - match parser.parse_next(i) { + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } } } + } +} - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match sep.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); +fn separated_n_<I, O, C, O2, E, P, S>( + count: usize, + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(Some(count)); + + if count == 0 { + return Ok(acc); + } + + match parser.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(o) => { + acc.accumulate(o); + } + } + + for _ in 1..count { + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); } - Err(e) => return Err(e), - Ok(_) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert(i, "sep parsers must always consume")); + + match parser.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); } + Ok(o) => { + acc.accumulate(o); + } + } + } + } + } - match parser.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); - } - Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); + Ok(acc) +} + +fn separated_m_n_<I, O, C, O2, E, P, S>( + min: usize, + max: usize, + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut acc = C::initial(Some(min)); + + let start = input.checkpoint(); + match parser.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if min == 0 { + input.reset(start); + return Ok(acc); + } else { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + for index in 1..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if index < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(acc); + } + } + Err(e) => { + return Err(e); + } + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if index < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(acc); } } + Err(e) => { + return Err(e); + } + Ok(o) => { + acc.accumulate(o); + } } } } - }) + } + + Ok(acc) } /// Alternates between two parsers, merging the results (left associative) /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Example @@ -461,14 +1103,14 @@ where pub fn separated_foldl1<I, O, O2, E, P, S, Op>( mut parser: P, mut sep: S, - op: Op, + mut op: Op, ) -> impl Parser<I, O, E> where I: Stream, P: Parser<I, O, E>, S: Parser<I, O2, E>, E: ParserError<I>, - Op: Fn(O, O2, O) -> O, + Op: FnMut(O, O2, O) -> O, { trace("separated_foldl1", move |i: &mut I| { let mut ol = parser.parse_next(i)?; @@ -506,7 +1148,7 @@ where /// Alternates between two parsers, merging the results (right associative) /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Example @@ -530,14 +1172,14 @@ where pub fn separated_foldr1<I, O, O2, E, P, S, Op>( mut parser: P, mut sep: S, - op: Op, + mut op: Op, ) -> impl Parser<I, O, E> where I: Stream, P: Parser<I, O, E>, S: Parser<I, O2, E>, E: ParserError<I>, - Op: Fn(O, O2, O) -> O, + Op: FnMut(O, O2, O) -> O, { trace("separated_foldr1", move |i: &mut I| { let ol = parser.parse_next(i)?; @@ -556,73 +1198,6 @@ where }) } -fn repeat_m_n_<I, O, C, E, F>(min: usize, max: usize, parse: &mut F, input: &mut I) -> PResult<C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParserError<I>, -{ - if min > max { - return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); - } - - let mut res = C::initial(Some(min)); - for count in 0..max { - let start = input.checkpoint(); - let len = input.eof_offset(); - match parse.parse_next(input) { - Ok(value) => { - // infinite loop check: the parser must always consume - if input.eof_offset() == len { - return Err(ErrMode::assert( - input, - "`repeat` parsers must always consume", - )); - } - - res.accumulate(value); - } - Err(ErrMode::Backtrack(e)) => { - if count < min { - return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); - } else { - input.reset(start); - return Ok(res); - } - } - Err(e) => { - return Err(e); - } - } - } - - Ok(res) -} - -fn repeat_n_<I, O, C, E, F>(count: usize, f: &mut F, i: &mut I) -> PResult<C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParserError<I>, -{ - let mut res = C::initial(Some(count)); - - for _ in 0..count { - match f.parse_next(i) { - Ok(o) => { - res.accumulate(o); - } - Err(e) => { - return Err(e.append(i, ErrorKind::Many)); - } - } - } - - Ok(res) -} - /// Repeats the embedded parser, filling the given slice with results. /// /// This parser fails if the input runs out before the given slice is full. @@ -673,109 +1248,14 @@ where }) } -/// Repeats the embedded parser `m..=n` times, calling `g` to gather the results -/// -/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `m` The minimum number of iterations. -/// * `n` The maximum number of iterations. -/// * `f` The parser to apply. -/// * `init` A function returning the initial value. -/// * `g` The function that combines a result of `f` with -/// the current accumulator. -/// -/// **Warning:** If the parser passed to `fold_repeat` accepts empty inputs -/// (like `alpha0` or `digit0`), `fold_repeat` will return an error, -/// to prevent going into an infinite loop. -/// -/// # Example -/// -/// Zero or more repetitions: -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::combinator::fold_repeat; -/// use winnow::token::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_repeat( -/// 0.., -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_peek(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// ``` -/// -/// One or more repetitions: -/// ```rust -/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::combinator::fold_repeat; -/// use winnow::token::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_repeat( -/// 1.., -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_peek(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Many)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Many)))); -/// ``` -/// -/// Arbitrary number of repetitions: -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::combinator::fold_repeat; -/// use winnow::token::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_repeat( -/// 0..=2, -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_peek(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); -/// ``` -#[doc(alias = "fold_many0")] -#[doc(alias = "fold_many1")] -#[doc(alias = "fold_many_m_n")] +/// Deprecated, replaced with [`Repeat::fold`] +#[deprecated(since = "0.5.36", note = "Replaced with `repeat(...).fold(...)`")] #[inline(always)] pub fn fold_repeat<I, O, E, F, G, H, R>( range: impl Into<Range>, - mut f: F, - mut init: H, - mut g: G, + f: F, + init: H, + g: G, ) -> impl Parser<I, R, E> where I: Stream, @@ -784,24 +1264,7 @@ where H: FnMut() -> R, E: ParserError<I>, { - let Range { - start_inclusive, - end_inclusive, - } = range.into(); - trace("fold_repeat", move |i: &mut I| { - match (start_inclusive, end_inclusive) { - (0, None) => fold_repeat0_(&mut f, &mut init, &mut g, i), - (1, None) => fold_repeat1_(&mut f, &mut init, &mut g, i), - (start, end) => fold_repeat_m_n_( - start, - end.unwrap_or(usize::MAX), - &mut f, - &mut init, - &mut g, - i, - ), - } - }) + repeat(range, f).fold(init, g) } fn fold_repeat0_<I, O, E, F, G, H, R>( diff --git a/src/combinator/parser.rs b/src/combinator/parser.rs index fb11adc..9ffdb3c 100644 --- a/src/combinator/parser.rs +++ b/src/combinator/parser.rs @@ -1,13 +1,17 @@ +use crate::combinator::trace; +use crate::combinator::trace_result; +#[cfg(feature = "unstable-recover")] +use crate::error::FromRecoverableError; use crate::error::{AddContext, ErrMode, ErrorKind, FromExternalError, ParserError}; use crate::lib::std::borrow::Borrow; use crate::lib::std::ops::Range; +#[cfg(feature = "unstable-recover")] +use crate::stream::Recover; use crate::stream::StreamIsPartial; use crate::stream::{Location, Stream}; -use crate::trace::trace; -use crate::trace::trace_result; use crate::*; -/// Implementation of [`Parser::by_ref`][Parser::by_ref] +/// Implementation of [`Parser::by_ref`] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct ByRef<'p, P> { p: &'p mut P, @@ -35,7 +39,7 @@ where pub struct Map<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(O) -> O2, + G: FnMut(O) -> O2, { parser: F, map: G, @@ -48,7 +52,7 @@ where impl<F, G, I, O, O2, E> Map<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(O) -> O2, + G: FnMut(O) -> O2, { #[inline(always)] pub(crate) fn new(parser: F, map: G) -> Self { @@ -66,7 +70,7 @@ where impl<F, G, I, O, O2, E> Parser<I, O2, E> for Map<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(O) -> O2, + G: FnMut(O) -> O2, { #[inline] fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { @@ -393,7 +397,7 @@ where pub struct Verify<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: Borrow<O2>, O2: ?Sized, @@ -410,7 +414,7 @@ where impl<F, G, I, O, O2, E> Verify<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: Borrow<O2>, O2: ?Sized, @@ -432,7 +436,7 @@ where impl<F, G, I, O, O2, E> Parser<I, O, E> for Verify<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: Borrow<O2>, O2: ?Sized, @@ -493,6 +497,48 @@ where } } +/// Implementation of [`Parser::default_value`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + parser: F, + o2: core::marker::PhantomData<O2>, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, O2, E> DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + o2: Default::default(), + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, O2, E> Parser<I, O2, E> for DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O2, E> { + (self.parser).parse_next(input).map(|_| O2::default()) + } +} + /// Implementation of [`Parser::void`] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct Void<F, I, O, E> @@ -861,3 +907,191 @@ where .parse_next(i) } } + +/// Implementation of [`Parser::retry_after`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +#[cfg(feature = "unstable-recover")] +pub struct RetryAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + parser: P, + recover: R, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> RetryAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + pub(crate) fn new(parser: P, recover: R) -> Self { + Self { + parser, + recover, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> Parser<I, O, E> for RetryAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + if I::is_recovery_supported() { + retry_after_inner(&mut self.parser, &mut self.recover, i) + } else { + self.parser.parse_next(i) + } + } +} + +#[cfg(feature = "unstable-recover")] +fn retry_after_inner<P, R, I, O, E>(parser: &mut P, recover: &mut R, i: &mut I) -> PResult<O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + loop { + let token_start = i.checkpoint(); + let mut err = match parser.parse_next(i) { + Ok(o) => { + return Ok(o); + } + Err(ErrMode::Incomplete(e)) => return Err(ErrMode::Incomplete(e)), + Err(err) => err, + }; + let err_start = i.checkpoint(); + let err_start_eof_offset = i.eof_offset(); + if recover.parse_next(i).is_ok() { + let i_eof_offset = i.eof_offset(); + if err_start_eof_offset == i_eof_offset { + // Didn't advance so bubble the error up + } else if let Err(err_) = i.record_err(&token_start, &err_start, err) { + err = err_; + } else { + continue; + } + } + + i.reset(err_start.clone()); + err = err.map(|err| E::from_recoverable_error(&token_start, &err_start, i, err)); + return Err(err); + } +} + +/// Implementation of [`Parser::resume_after`] +#[cfg(feature = "unstable-recover")] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct ResumeAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + parser: P, + recover: R, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> ResumeAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + pub(crate) fn new(parser: P, recover: R) -> Self { + Self { + parser, + recover, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> Parser<I, Option<O>, E> for ResumeAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<Option<O>, E> { + if I::is_recovery_supported() { + resume_after_inner(&mut self.parser, &mut self.recover, i) + } else { + self.parser.parse_next(i).map(Some) + } + } +} + +#[cfg(feature = "unstable-recover")] +fn resume_after_inner<P, R, I, O, E>( + parser: &mut P, + recover: &mut R, + i: &mut I, +) -> PResult<Option<O>, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + let token_start = i.checkpoint(); + let mut err = match parser.parse_next(i) { + Ok(o) => { + return Ok(Some(o)); + } + Err(ErrMode::Incomplete(e)) => return Err(ErrMode::Incomplete(e)), + Err(err) => err, + }; + let err_start = i.checkpoint(); + if recover.parse_next(i).is_ok() { + if let Err(err_) = i.record_err(&token_start, &err_start, err) { + err = err_; + } else { + return Ok(None); + } + } + + i.reset(err_start.clone()); + err = err.map(|err| E::from_recoverable_error(&token_start, &err_start, i, err)); + Err(err) +} diff --git a/src/combinator/sequence.rs b/src/combinator/sequence.rs index 5cfeb9c..0f2e633 100644 --- a/src/combinator/sequence.rs +++ b/src/combinator/sequence.rs @@ -1,14 +1,19 @@ +use crate::combinator::trace; use crate::error::ParserError; use crate::stream::Stream; -use crate::trace::trace; use crate::*; +#[doc(inline)] +pub use crate::seq; + /// Sequence two parsers, only returning the output from the second. /// /// # Arguments /// * `first` The opening parser. /// * `second` The second parser to get object. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust @@ -47,6 +52,8 @@ where /// * `first` The first parser to apply. /// * `second` The second parser to match an object. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust @@ -86,6 +93,8 @@ where /// * `sep` The separator parser to apply. /// * `second` The second parser to apply. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust @@ -127,6 +136,8 @@ where /// * `second` The second parser to apply. /// * `third` The third parser to apply and discard. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust diff --git a/src/combinator/tests.rs b/src/combinator/tests.rs index 9d2b49d..726b410 100644 --- a/src/combinator/tests.rs +++ b/src/combinator/tests.rs @@ -717,13 +717,13 @@ fn permutation_test() { #[cfg(feature = "alloc")] fn separated0_test() { fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abcd", ",").parse_peek(i) + separated(0.., "abcd", ",").parse_peek(i) } fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("", ",").parse_peek(i) + separated(0.., "", ",").parse_peek(i) } fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abcd", "..").parse_peek(i) + separated(0.., "abcd", "..").parse_peek(i) } let a = &b"abcdef"[..]; @@ -773,7 +773,7 @@ fn separated0_test() { #[cfg_attr(debug_assertions, should_panic)] fn separated0_empty_sep_test() { fn empty_sep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abc", "").parse_peek(i) + separated(0.., "abc", "").parse_peek(i) } let i = &b"abcabc"[..]; @@ -792,10 +792,10 @@ fn separated0_empty_sep_test() { #[cfg(feature = "alloc")] fn separated1_test() { fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated1("abcd", ",").parse_peek(i) + separated(1.., "abcd", ",").parse_peek(i) } fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated1("abcd", "..").parse_peek(i) + separated(1.., "abcd", "..").parse_peek(i) } let a = &b"abcdef"[..]; @@ -840,6 +840,47 @@ fn separated1_test() { #[test] #[cfg(feature = "alloc")] +fn separated_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(2..=4, "abcd", ",").parse_peek(i) + } + + let a = &b"abcd,ef"[..]; + let b = &b"abcd,abcd,efgh"[..]; + let c = &b"abcd,abcd,abcd,abcd,efgh"[..]; + let d = &b"abcd,abcd,abcd,abcd,abcd,efgh"[..]; + let e = &b"abcd,ab"[..]; + + assert_eq!( + multi(Partial::new(a)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"ef"[..]), + ErrorKind::Tag + ))) + ); + let res1 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b",efgh"[..]), res1)) + ); + let res2 = vec![&b"abcd"[..], &b"abcd"[..], &b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(c)), + Ok((Partial::new(&b",efgh"[..]), res2)) + ); + let res3 = vec![&b"abcd"[..], &b"abcd"[..], &b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(d)), + Ok((Partial::new(&b",abcd,efgh"[..]), res3)) + ); + assert_eq!( + multi(Partial::new(e)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +#[cfg(feature = "alloc")] fn repeat0_test() { fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { repeat(0.., "abcd").parse_peek(i) @@ -925,7 +966,7 @@ fn repeat1_test() { fn repeat_till_test() { #[allow(clippy::type_complexity)] fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { - repeat_till0("abcd", "efgh").parse_peek(i) + repeat_till(0.., "abcd", "efgh").parse_peek(i) } let a = b"abcdabcdefghabcd"; @@ -947,6 +988,46 @@ fn repeat_till_test() { } #[test] +#[cfg(feature = "alloc")] +fn repeat_till_range_test() { + #[allow(clippy::type_complexity)] + fn multi(i: &str) -> IResult<&str, (Vec<&str>, &str)> { + repeat_till(2..=4, "ab", "cd").parse_peek(i) + } + + assert_eq!( + multi("cd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"cd", ErrorKind::Tag) + ))) + ); + assert_eq!( + multi("abcd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"cd", ErrorKind::Tag) + ))) + ); + assert_eq!(multi("ababcd"), Ok(("", (vec!["ab", "ab"], "cd")))); + assert_eq!(multi("abababcd"), Ok(("", (vec!["ab", "ab", "ab"], "cd")))); + assert_eq!( + multi("ababababcd"), + Ok(("", (vec!["ab", "ab", "ab", "ab"], "cd"))) + ); + assert_eq!( + multi("abababababcd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"abcd", ErrorKind::Tag) + ))) + ); +} + +#[test] #[cfg(feature = "std")] fn infinite_many() { fn tst(input: &[u8]) -> IResult<&[u8], &[u8]> { @@ -1120,7 +1201,9 @@ fn fold_repeat0_test() { acc } fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(0.., "abcd", Vec::new, fold_into_vec).parse_peek(i) + repeat(0.., "abcd") + .fold(Vec::new, fold_into_vec) + .parse_peek(i) } assert_eq!( @@ -1158,7 +1241,7 @@ fn fold_repeat0_empty_test() { acc } fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(0.., "", Vec::new, fold_into_vec).parse_peek(i) + repeat(0.., "").fold(Vec::new, fold_into_vec).parse_peek(i) } assert_eq!( @@ -1178,7 +1261,9 @@ fn fold_repeat1_test() { acc } fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(1.., "abcd", Vec::new, fold_into_vec).parse_peek(i) + repeat(1.., "abcd") + .fold(Vec::new, fold_into_vec) + .parse_peek(i) } let a = &b"abcdef"[..]; @@ -1214,7 +1299,9 @@ fn fold_repeat_test() { acc } fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(2..=4, "Abcd", Vec::new, fold_into_vec).parse_peek(i) + repeat(2..=4, "Abcd") + .fold(Vec::new, fold_into_vec) + .parse_peek(i) } let a = &b"Abcdef"[..]; |