1 files changed, 619 insertions, 0 deletions
diff --git a/src/position.rs b/src/position.rs
new file mode 100644
index 0000000..76e3383
--- /dev/null
+++ b/src/position.rs
@@ -0,0 +1,619 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use std::cmp::Ordering;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::ops::Range;
+use std::ptr;
+use std::str;
+
+use span;
+
+/// A cursor position in a `&str` which provides useful methods to manually parse that string.
+#[derive(Clone)]
+pub struct Position<'i> {
+    input: &'i str,
+    /// # Safety:
+    ///
+    /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
+    pos: usize,
+}
+
+impl<'i> Position<'i> {
+    /// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
+    ///
+    /// # Safety:
+    ///
+    /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
+    pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position {
+        debug_assert!(input.get(pos..).is_some());
+        Position { input, pos }
+    }
+
+    /// Attempts to create a new `Position` at the given position. If the specified position is
+    /// an invalid index, or the specified position is not a valid UTF8 boundary, then None is
+    /// returned.
+    ///
+    /// # Examples
+    /// ```
+    /// # use pest::Position;
+    /// let cheart = '💖';
+    /// let heart = "💖";
+    /// assert_eq!(Position::new(heart, 1), None);
+    /// assert_ne!(Position::new(heart, cheart.len_utf8()), None);
+    /// ```
+    #[allow(clippy::new_ret_no_self)]
+    pub fn new(input: &str, pos: usize) -> Option<Position> {
+        input.get(pos..).map(|_| Position { input, pos })
+    }
+
+    /// Creates a `Position` at the start of a `&str`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use pest::Position;
+    /// let start = Position::from_start("");
+    /// assert_eq!(start.pos(), 0);
+    /// ```
+    #[inline]
+    pub fn from_start(input: &'i str) -> Position<'i> {
+        // Position 0 is always safe because it's always a valid UTF-8 border.
+        Position { input, pos: 0 }
+    }
+
+    /// Returns the byte position of this `Position` as a `usize`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use pest::Position;
+    /// let input = "ab";
+    /// let mut start = Position::from_start(input);
+    ///
+    /// assert_eq!(start.pos(), 0);
+    /// ```
+    #[inline]
+    pub fn pos(&self) -> usize {
+        self.pos
+    }
+
+    /// Creates a `Span` from two `Position`s.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the positions come from different inputs.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use pest::Position;
+    /// let input = "ab";
+    /// let start = Position::from_start(input);
+    /// let span = start.span(&start.clone());
+    ///
+    /// assert_eq!(span.start(), 0);
+    /// assert_eq!(span.end(), 0);
+    /// ```
+    #[inline]
+    pub fn span(&self, other: &Position<'i>) -> span::Span<'i> {
+        if ptr::eq(self.input, other.input)
+        /* && self.input.get(self.pos..other.pos).is_some() */
+        {
+            // This is safe because the pos field of a Position should always be a valid str index.
+            unsafe { span::Span::new_unchecked(self.input, self.pos, other.pos) }
+        } else {
+            // TODO: maybe a panic if self.pos < other.pos
+            panic!("span created from positions from different inputs")
+        }
+    }
+
+    /// Returns the line and column number of this `Position`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use pest;
+    /// # #[allow(non_camel_case_types)]
+    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+    /// enum Rule {}
+    ///
+    /// let input = "\na";
+    /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
+    /// let mut result = state.match_string("\na");
+    /// assert!(result.is_ok());
+    /// assert_eq!(result.unwrap().position().line_col(), (2, 2));
+    /// ```
+    #[inline]
+    pub fn line_col(&self) -> (usize, usize) {
+        if self.pos > self.input.len() {
+            panic!("position out of bounds");
+        }
+
+        let mut pos = self.pos;
+        // Position's pos is always a UTF-8 border.
+        let slice = &self.input[..pos];
+        let mut chars = slice.chars().peekable();
+
+        let mut line_col = (1, 1);
+
+        while pos != 0 {
+            match chars.next() {
+                Some('\r') => {
+                    if let Some(&'\n') = chars.peek() {
+                        chars.next();
+
+                        if pos == 1 {
+                            pos -= 1;
+                        } else {
+                            pos -= 2;
+                        }
+
+                        line_col = (line_col.0 + 1, 1);
+                    } else {
+                        pos -= 1;
+                        line_col = (line_col.0, line_col.1 + 1);
+                    }
+                }
+                Some('\n') => {
+                    pos -= 1;
+                    line_col = (line_col.0 + 1, 1);
+                }
+                Some(c) => {
+                    pos -= c.len_utf8();
+                    line_col = (line_col.0, line_col.1 + 1);
+                }
+                None => unreachable!(),
+            }
+        }
+
+        line_col
+    }
+
+    /// Returns the entire line of the input that contains this `Position`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use pest;
+    /// # #[allow(non_camel_case_types)]
+    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+    /// enum Rule {}
+    ///
+    /// let input = "\na";
+    /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
+    /// let mut result = state.match_string("\na");
+    /// assert!(result.is_ok());
+    /// assert_eq!(result.unwrap().position().line_of(), "a");
+    /// ```
+    #[inline]
+    pub fn line_of(&self) -> &'i str {
+        if self.pos > self.input.len() {
+            panic!("position out of bounds");
+        };
+        // Safe since start and end can only be valid UTF-8 borders.
+        &self.input[self.find_line_start()..self.find_line_end()]
+    }
+
+    pub(crate) fn find_line_start(&self) -> usize {
+        if self.input.is_empty() {
+            return 0;
+        };
+        // Position's pos is always a UTF-8 border.
+        let start = self
+            .input
+            .char_indices()
+            .rev()
+            .skip_while(|&(i, _)| i >= self.pos)
+            .find(|&(_, c)| c == '\n');
+        match start {
+            Some((i, _)) => i + 1,
+            None => 0,
+        }
+    }
+
+    pub(crate) fn find_line_end(&self) -> usize {
+        if self.input.is_empty() {
+            0
+        } else if self.pos == self.input.len() - 1 {
+            self.input.len()
+        } else {
+            // Position's pos is always a UTF-8 border.
+            let end = self
+                .input
+                .char_indices()
+                .skip_while(|&(i, _)| i < self.pos)
+                .find(|&(_, c)| c == '\n');
+            match end {
+                Some((i, _)) => i + 1,
+                None => self.input.len(),
+            }
+        }
+    }
+
+    /// Returns `true` when the `Position` points to the start of the input `&str`.
+    #[inline]
+    pub(crate) fn at_start(&self) -> bool {
+        self.pos == 0
+    }
+
+    /// Returns `true` when the `Position` points to the end of the input `&str`.
+    #[inline]
+    pub(crate) fn at_end(&self) -> bool {
+        self.pos == self.input.len()
+    }
+
+    /// Skips `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
+    /// otherwise. If the return value is `false`, `pos` will not be updated.
+    #[inline]
+    pub(crate) fn skip(&mut self, n: usize) -> bool {
+        let skipped = {
+            let mut len = 0;
+            // Position's pos is always a UTF-8 border.
+            let mut chars = (&self.input[self.pos..]).chars();
+            for _ in 0..n {
+                if let Some(c) = chars.next() {
+                    len += c.len_utf8();
+                } else {
+                    return false;
+                }
+            }
+            len
+        };
+
+        self.pos += skipped;
+        true
+    }
+
+    /// Goes back `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
+    /// otherwise. If the return value is `false`, `pos` will not be updated.
+    #[inline]
+    pub(crate) fn skip_back(&mut self, n: usize) -> bool {
+        let skipped = {
+            let mut len = 0;
+            // Position's pos is always a UTF-8 border.
+            let mut chars = (&self.input[..self.pos]).chars().rev();
+            for _ in 0..n {
+                if let Some(c) = chars.next() {
+                    len += c.len_utf8();
+                } else {
+                    return false;
+                }
+            }
+            len
+        };
+
+        self.pos -= skipped;
+        true
+    }
+
+    /// Skips until one of the given `strings` is found. If none of the `strings` can be found,
+    /// this function will return `false` but its `pos` will *still* be updated.
+    #[inline]
+    pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
+        for from in self.pos..self.input.len() {
+            let bytes = if let Some(string) = self.input.get(from..) {
+                string.as_bytes()
+            } else {
+                continue;
+            };
+
+            for slice in strings.iter() {
+                let to = slice.len();
+                if Some(slice.as_bytes()) == bytes.get(0..to) {
+                    self.pos = from;
+                    return true;
+                }
+            }
+        }
+
+        self.pos = self.input.len();
+        false
+    }
+
+    /// Matches the char at the `Position` against a filter function and returns `true` if a match
+    /// was made. If no match was made, returns `false` and `pos` will not be updated.
+    #[inline]
+    pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool
+    where
+        F: FnOnce(char) -> bool,
+    {
+        if let Some(c) = (&self.input[self.pos..]).chars().next() {
+            if f(c) {
+                self.pos += c.len_utf8();
+                true
+            } else {
+                false
+            }
+        } else {
+            false
+        }
+    }
+
+    /// Matches `string` from the `Position` and returns `true` if a match was made or `false`
+    /// otherwise. If no match was made, `pos` will not be updated.
+    #[inline]
+    pub(crate) fn match_string(&mut self, string: &str) -> bool {
+        let to = self.pos + string.len();
+
+        if Some(string.as_bytes()) == self.input.as_bytes().get(self.pos..to) {
+            self.pos = to;
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Case-insensitively matches `string` from the `Position` and returns `true` if a match was
+    /// made or `false` otherwise. If no match was made, `pos` will not be updated.
+    #[inline]
+    pub(crate) fn match_insensitive(&mut self, string: &str) -> bool {
+        let matched = {
+            let slice = &self.input[self.pos..];
+            if let Some(slice) = slice.get(0..string.len()) {
+                slice.eq_ignore_ascii_case(string)
+            } else {
+                false
+            }
+        };
+
+        if matched {
+            self.pos += string.len();
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Matches `char` `range` from the `Position` and returns `true` if a match was made or `false`
+    /// otherwise. If no match was made, `pos` will not be updated.
+    #[inline]
+    pub(crate) fn match_range(&mut self, range: Range<char>) -> bool {
+        if let Some(c) = (&self.input[self.pos..]).chars().next() {
+            if range.start <= c && c <= range.end {
+                self.pos += c.len_utf8();
+                return true;
+            }
+        }
+
+        false
+    }
+}
+
+impl<'i> fmt::Debug for Position<'i> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("Position").field("pos", &self.pos).finish()
+    }
+}
+
+impl<'i> PartialEq for Position<'i> {
+    fn eq(&self, other: &Position<'i>) -> bool {
+        ptr::eq(self.input, other.input) && self.pos == other.pos
+    }
+}
+
+impl<'i> Eq for Position<'i> {}
+
+impl<'i> PartialOrd for Position<'i> {
+    fn partial_cmp(&self, other: &Position<'i>) -> Option<Ordering> {
+        if ptr::eq(self.input, other.input) {
+            self.pos.partial_cmp(&other.pos)
+        } else {
+            None
+        }
+    }
+}
+
+impl<'i> Ord for Position<'i> {
+    fn cmp(&self, other: &Position<'i>) -> Ordering {
+        self.partial_cmp(other)
+            .expect("cannot compare positions from different strs")
+    }
+}
+
+impl<'i> Hash for Position<'i> {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        (self.input as *const str).hash(state);
+        self.pos.hash(state);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use super::*;
+
+    #[test]
+    fn empty() {
+        let input = "";
+        assert_eq!(Position::new(input, 0).unwrap().match_string(""), true);
+        assert_eq!(!Position::new(input, 0).unwrap().match_string("a"), true);
+    }
+
+    #[test]
+    fn parts() {
+        let input = "asdasdf";
+
+        assert_eq!(Position::new(input, 0).unwrap().match_string("asd"), true);
+        assert_eq!(Position::new(input, 3).unwrap().match_string("asdf"), true);
+    }
+
+    #[test]
+    fn line_col() {
+        let input = "a\rb\nc\r\nd嗨";
+
+        assert_eq!(Position::new(input, 0).unwrap().line_col(), (1, 1));
+        assert_eq!(Position::new(input, 1).unwrap().line_col(), (1, 2));
+        assert_eq!(Position::new(input, 2).unwrap().line_col(), (1, 3));
+        assert_eq!(Position::new(input, 3).unwrap().line_col(), (1, 4));
+        assert_eq!(Position::new(input, 4).unwrap().line_col(), (2, 1));
+        assert_eq!(Position::new(input, 5).unwrap().line_col(), (2, 2));
+        assert_eq!(Position::new(input, 6).unwrap().line_col(), (2, 3));
+        assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1));
+        assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2));
+        assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3));
+    }
+
+    #[test]
+    fn line_of() {
+        let input = "a\rb\nc\r\nd嗨";
+
+        assert_eq!(Position::new(input, 0).unwrap().line_of(), "a\rb\n");
+        assert_eq!(Position::new(input, 1).unwrap().line_of(), "a\rb\n");
+        assert_eq!(Position::new(input, 2).unwrap().line_of(), "a\rb\n");
+        assert_eq!(Position::new(input, 3).unwrap().line_of(), "a\rb\n");
+        assert_eq!(Position::new(input, 4).unwrap().line_of(), "c\r\n");
+        assert_eq!(Position::new(input, 5).unwrap().line_of(), "c\r\n");
+        assert_eq!(Position::new(input, 6).unwrap().line_of(), "c\r\n");
+        assert_eq!(Position::new(input, 7).unwrap().line_of(), "d嗨");
+        assert_eq!(Position::new(input, 8).unwrap().line_of(), "d嗨");
+        assert_eq!(Position::new(input, 11).unwrap().line_of(), "d嗨");
+    }
+
+    #[test]
+    fn line_of_empty() {
+        let input = "";
+
+        assert_eq!(Position::new(input, 0).unwrap().line_of(), "");
+    }
+
+    #[test]
+    fn line_of_new_line() {
+        let input = "\n";
+
+        assert_eq!(Position::new(input, 0).unwrap().line_of(), "\n");
+    }
+
+    #[test]
+    fn line_of_between_new_line() {
+        let input = "\n\n";
+
+        assert_eq!(Position::new(input, 1).unwrap().line_of(), "\n");
+    }
+
+    fn measure_skip(input: &str, pos: usize, n: usize) -> Option<usize> {
+        let mut p = Position::new(input, pos).unwrap();
+        if p.skip(n) {
+            Some(p.pos - pos)
+        } else {
+            None
+        }
+    }
+
+    #[test]
+    fn skip_empty() {
+        let input = "";
+
+        assert_eq!(measure_skip(input, 0, 0), Some(0));
+        assert_eq!(measure_skip(input, 0, 1), None);
+    }
+
+    #[test]
+    fn skip() {
+        let input = "d嗨";
+
+        assert_eq!(measure_skip(input, 0, 0), Some(0));
+        assert_eq!(measure_skip(input, 0, 1), Some(1));
+        assert_eq!(measure_skip(input, 1, 1), Some(3));
+    }
+
+    #[test]
+    fn skip_until() {
+        let input = "ab ac";
+        let pos = Position::from_start(input);
+
+        let mut test_pos = pos.clone();
+        test_pos.skip_until(&["a", "b"]);
+        assert_eq!(test_pos.pos(), 0);
+
+        test_pos = pos.clone();
+        test_pos.skip_until(&["b"]);
+        assert_eq!(test_pos.pos(), 1);
+
+        test_pos = pos.clone();
+        test_pos.skip_until(&["ab"]);
+        assert_eq!(test_pos.pos(), 0);
+
+        test_pos = pos.clone();
+        test_pos.skip_until(&["ac", "z"]);
+        assert_eq!(test_pos.pos(), 3);
+
+        test_pos = pos.clone();
+        assert!(!test_pos.skip_until(&["z"]));
+        assert_eq!(test_pos.pos(), 5);
+    }
+
+    #[test]
+    fn match_range() {
+        let input = "b";
+
+        assert_eq!(Position::new(input, 0).unwrap().match_range('a'..'c'), true);
+        assert_eq!(Position::new(input, 0).unwrap().match_range('b'..'b'), true);
+        assert_eq!(
+            !Position::new(input, 0).unwrap().match_range('a'..'a'),
+            true
+        );
+        assert_eq!(
+            !Position::new(input, 0).unwrap().match_range('c'..'c'),
+            true
+        );
+        assert_eq!(
+            Position::new(input, 0).unwrap().match_range('a'..'嗨'),
+            true
+        );
+    }
+
+    #[test]
+    fn match_insensitive() {
+        let input = "AsdASdF";
+
+        assert_eq!(
+            Position::new(input, 0).unwrap().match_insensitive("asd"),
+            true
+        );
+        assert_eq!(
+            Position::new(input, 3).unwrap().match_insensitive("asdf"),
+            true
+        );
+    }
+
+    #[test]
+    fn cmp() {
+        let input = "a";
+        let start = Position::from_start(input);
+        let mut end = start.clone();
+
+        assert!(end.skip(1));
+        let result = start.cmp(&end);
+
+        assert_eq!(result, Ordering::Less);
+    }
+
+    #[test]
+    #[should_panic]
+    fn cmp_panic() {
+        let input1 = "a";
+        let input2 = "b";
+        let pos1 = Position::from_start(input1);
+        let pos2 = Position::from_start(input2);
+
+        pos1.cmp(&pos2);
+    }
+
+    #[test]
+    fn hash() {
+        let input = "a";
+        let start = Position::from_start(input);
+        let mut positions = HashSet::new();
+
+        positions.insert(start);
+    }
+}