aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compile.rs11
-rw-r--r--src/exec.rs5
-rw-r--r--src/lib.rs1
-rw-r--r--src/literal/imp.rs3
-rw-r--r--src/re_builder.rs2
-rw-r--r--src/re_bytes.rs31
-rw-r--r--src/re_set.rs7
-rw-r--r--src/re_trait.rs27
-rw-r--r--src/re_unicode.rs31
9 files changed, 111 insertions, 7 deletions
diff --git a/src/compile.rs b/src/compile.rs
index cdc583c..9ffd347 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -1,4 +1,5 @@
use std::collections::HashMap;
+use std::fmt;
use std::iter;
use std::result;
use std::sync::Arc;
@@ -25,6 +26,9 @@ struct Patch {
/// A compiler translates a regular expression AST to a sequence of
/// instructions. The sequence of instructions represents an NFA.
+// `Compiler` is only public via the `internal` module, so avoid deriving
+// `Debug`.
+#[allow(missing_debug_implementations)]
pub struct Compiler {
insts: Vec<MaybeInst>,
compiled: Program,
@@ -1051,6 +1055,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html),
/// except it uses hashes as original indices and then compares full keys for
/// validation against `dense` array.
+#[derive(Debug)]
struct SuffixCache {
sparse: Box<[usize]>,
dense: Vec<SuffixCacheEntry>,
@@ -1159,6 +1164,12 @@ impl ByteClassSet {
}
}
+impl fmt::Debug for ByteClassSet {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_tuple("ByteClassSet").field(&&self.0[..]).finish()
+ }
+}
+
fn u32_to_usize(n: u32) -> usize {
// In case usize is less than 32 bits, we need to guard against overflow.
// On most platforms this compiles to nothing.
diff --git a/src/exec.rs b/src/exec.rs
index acca2dc..e1aae87 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -30,6 +30,7 @@ use utf8::next_utf8;
/// In particular, this manages the various compiled forms of a single regular
/// expression and the choice of which matching engine to use to execute a
/// regular expression.
+#[derive(Debug)]
pub struct Exec {
/// All read only state.
ro: Arc<ExecReadOnly>,
@@ -49,6 +50,7 @@ pub struct ExecNoSync<'c> {
}
/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
+#[derive(Debug)]
pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
@@ -97,6 +99,9 @@ struct ExecReadOnly {
/// Facilitates the construction of an executor by exposing various knobs
/// to control how a regex is executed and what kinds of resources it's
/// permitted to use.
+// `ExecBuilder` is only public via the `internal` module, so avoid deriving
+// `Debug`.
+#[allow(missing_debug_implementations)]
pub struct ExecBuilder {
options: RegexOptions,
match_type: Option<MatchType>,
diff --git a/src/lib.rs b/src/lib.rs
index bdcebd4..d3dc58d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -616,6 +616,7 @@ another matching engine with fixed memory requirements.
#![deny(missing_docs)]
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "pattern", feature(pattern))]
+#![warn(missing_debug_implementations)]
#[cfg(not(feature = "std"))]
compile_error!("`std` feature is currently required to build this crate");
diff --git a/src/literal/imp.rs b/src/literal/imp.rs
index fe07ffc..e4d04ed 100644
--- a/src/literal/imp.rs
+++ b/src/literal/imp.rs
@@ -72,7 +72,7 @@ impl LiteralSearcher {
/// Returns true if all matches comprise the entire regular expression.
///
/// This does not necessarily mean that a literal match implies a match
- /// of the regular expression. For example, the regular expresison `^a`
+ /// of the regular expression. For example, the regular expression `^a`
/// is comprised of a single complete literal `a`, but the regular
/// expression demands that it only match at the beginning of a string.
pub fn complete(&self) -> bool {
@@ -232,6 +232,7 @@ impl Matcher {
}
}
+#[derive(Debug)]
pub enum LiteralIter<'a> {
Empty,
Bytes(&'a [u8]),
diff --git a/src/re_builder.rs b/src/re_builder.rs
index 3fef99d..fc140f8 100644
--- a/src/re_builder.rs
+++ b/src/re_builder.rs
@@ -47,6 +47,7 @@ macro_rules! define_builder {
/// A builder can be used to configure how the regex is built, for example, by
/// setting the default flags (which can be overridden in the expression
/// itself) or setting various limits.
+ #[derive(Debug)]
pub struct RegexBuilder(RegexOptions);
impl RegexBuilder {
@@ -244,6 +245,7 @@ macro_rules! define_set_builder {
/// A builder can be used to configure how the regexes are built, for example,
/// by setting the default flags (which can be overridden in the expression
/// itself) or setting various limits.
+ #[derive(Debug)]
pub struct RegexSetBuilder(RegexOptions);
impl RegexSetBuilder {
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index ca01e0e..a091436 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -1,6 +1,7 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
+use std::iter::FusedIterator;
use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
@@ -690,6 +691,7 @@ impl Regex {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched byte string.
+#[derive(Debug)]
pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>);
impl<'r, 't> Iterator for Matches<'r, 't> {
@@ -701,6 +703,8 @@ impl<'r, 't> Iterator for Matches<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for Matches<'r, 't> {}
+
/// An iterator that yields all non-overlapping capture groups matching a
/// particular regular expression.
///
@@ -708,6 +712,7 @@ impl<'r, 't> Iterator for Matches<'r, 't> {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched byte string.
+#[derive(Debug)]
pub struct CaptureMatches<'r, 't>(
re_trait::CaptureMatches<'t, ExecNoSync<'r>>,
);
@@ -724,10 +729,13 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
+
/// Yields all substrings delimited by a regular expression match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
+#[derive(Debug)]
pub struct Split<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
@@ -757,12 +765,15 @@ impl<'r, 't> Iterator for Split<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for Split<'r, 't> {}
+
/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
+#[derive(Debug)]
pub struct SplitN<'r, 't> {
splits: Split<'r, 't>,
n: usize,
@@ -790,14 +801,21 @@ impl<'r, 't> Iterator for SplitN<'r, 't> {
Some(&text[self.splits.last..])
}
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.n))
+ }
}
+impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
+
/// An iterator over the names of all possible captures.
///
/// `None` indicates an unnamed capture; the first element (capture 0, the
/// whole matched region) is always unnamed.
///
/// `'r` is the lifetime of the compiled regular expression.
+#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
impl<'r> Iterator for CaptureNames<'r> {
@@ -813,8 +831,16 @@ impl<'r> Iterator for CaptureNames<'r> {
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
+
+ fn count(self) -> usize {
+ self.0.count()
+ }
}
+impl<'r> ExactSizeIterator for CaptureNames<'r> {}
+
+impl<'r> FusedIterator for CaptureNames<'r> {}
+
/// CaptureLocations is a low level representation of the raw offsets of each
/// submatch.
///
@@ -1057,7 +1083,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
@@ -1073,6 +1099,8 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
}
}
+impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
+
/// Replacer describes types that can be used to replace matches in a byte
/// string.
///
@@ -1173,6 +1201,7 @@ where
/// and performant (since capture groups don't need to be found).
///
/// `'t` is the lifetime of the literal text.
+#[derive(Clone, Debug)]
pub struct NoExpand<'t>(pub &'t [u8]);
impl<'t> Replacer for NoExpand<'t> {
diff --git a/src/re_set.rs b/src/re_set.rs
index b8954be..0a00229 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -320,6 +320,7 @@ impl<'a> IntoIterator for &'a SetMatches {
/// This will always produces matches in ascending order of index, where the
/// index corresponds to the index of the regex that matched with respect to
/// its position when initially building the set.
+#[derive(Debug)]
pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);
impl Iterator for SetMatchesIntoIter {
@@ -352,6 +353,8 @@ impl DoubleEndedIterator for SetMatchesIntoIter {
}
}
+impl iter::FusedIterator for SetMatchesIntoIter {}
+
/// A borrowed iterator over the set of matches from a regex set.
///
/// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
@@ -359,7 +362,7 @@ impl DoubleEndedIterator for SetMatchesIntoIter {
/// This will always produces matches in ascending order of index, where the
/// index corresponds to the index of the regex that matched with respect to
/// its position when initially building the set.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);
impl<'a> Iterator for SetMatchesIter<'a> {
@@ -392,6 +395,8 @@ impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
}
}
+impl<'a> iter::FusedIterator for SetMatchesIter<'a> {}
+
#[doc(hidden)]
impl From<Exec> for RegexSet {
fn from(exec: Exec) -> Self {
diff --git a/src/re_trait.rs b/src/re_trait.rs
index d14a9f7..ea6be9c 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -1,3 +1,6 @@
+use std::fmt;
+use std::iter::FusedIterator;
+
/// Slot is a single saved capture location. Note that there are two slots for
/// every capture in a regular expression (one slot each for the start and end
/// of the capture).
@@ -51,7 +54,7 @@ impl Locations {
/// Positions are byte indices in terms of the original string matched.
///
/// `'c` is the lifetime of the captures.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct SubCapturesPosIter<'c> {
idx: usize,
locs: &'c Locations,
@@ -73,6 +76,8 @@ impl<'c> Iterator for SubCapturesPosIter<'c> {
}
}
+impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
+
/// `RegularExpression` describes types that can implement regex searching.
///
/// This trait is my attempt at reducing code duplication and to standardize
@@ -85,9 +90,9 @@ impl<'c> Iterator for SubCapturesPosIter<'c> {
/// somewhat reasonable. One particular thing this trait would expose would be
/// the ability to start the search of a regex anywhere in a haystack, which
/// isn't possible in the current public API.
-pub trait RegularExpression: Sized {
+pub trait RegularExpression: Sized + fmt::Debug {
/// The type of the haystack.
- type Text: ?Sized;
+ type Text: ?Sized + fmt::Debug;
/// The number of capture slots in the compiled regular expression. This is
/// always two times the number of capture groups (two slots per group).
@@ -145,6 +150,7 @@ pub trait RegularExpression: Sized {
}
/// An iterator over all non-overlapping successive leftmost-first matches.
+#[derive(Debug)]
pub struct Matches<'t, R>
where
R: RegularExpression,
@@ -205,8 +211,16 @@ where
}
}
+impl<'t, R> FusedIterator for Matches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't + AsRef<[u8]>,
+{
+}
+
/// An iterator over all non-overlapping successive leftmost-first matches with
/// captures.
+#[derive(Debug)]
pub struct CaptureMatches<'t, R>(Matches<'t, R>)
where
R: RegularExpression,
@@ -260,3 +274,10 @@ where
Some(locs)
}
}
+
+impl<'t, R> FusedIterator for CaptureMatches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't + AsRef<[u8]>,
+{
+}
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index ea95c1b..df87c34 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -1,6 +1,7 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
+use std::iter::FusedIterator;
use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
@@ -747,6 +748,7 @@ impl Regex {
/// whole matched region) is always unnamed.
///
/// `'r` is the lifetime of the compiled regular expression.
+#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
impl<'r> Iterator for CaptureNames<'r> {
@@ -762,12 +764,21 @@ impl<'r> Iterator for CaptureNames<'r> {
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
+
+ fn count(self) -> usize {
+ self.0.count()
+ }
}
+impl<'r> ExactSizeIterator for CaptureNames<'r> {}
+
+impl<'r> FusedIterator for CaptureNames<'r> {}
+
/// Yields all substrings delimited by a regular expression match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the string being split.
+#[derive(Debug)]
pub struct Split<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
@@ -797,12 +808,15 @@ impl<'r, 't> Iterator for Split<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for Split<'r, 't> {}
+
/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the string being split.
+#[derive(Debug)]
pub struct SplitN<'r, 't> {
splits: Split<'r, 't>,
n: usize,
@@ -830,8 +844,14 @@ impl<'r, 't> Iterator for SplitN<'r, 't> {
Some(&text[self.splits.last..])
}
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.n))
+ }
}
+impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
+
/// CaptureLocations is a low level representation of the raw offsets of each
/// submatch.
///
@@ -1059,7 +1079,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
@@ -1075,6 +1095,8 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
}
}
+impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
+
/// An iterator that yields all non-overlapping capture groups matching a
/// particular regular expression.
///
@@ -1082,6 +1104,7 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched string.
+#[derive(Debug)]
pub struct CaptureMatches<'r, 't>(
re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>,
);
@@ -1098,6 +1121,8 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
+
/// An iterator over all non-overlapping matches for a particular string.
///
/// The iterator yields a `Match` value. The iterator stops when no more
@@ -1105,6 +1130,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched string.
+#[derive(Debug)]
pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>);
impl<'r, 't> Iterator for Matches<'r, 't> {
@@ -1116,6 +1142,8 @@ impl<'r, 't> Iterator for Matches<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for Matches<'r, 't> {}
+
/// Replacer describes types that can be used to replace matches in a string.
///
/// In general, users of this crate shouldn't need to implement this trait,
@@ -1215,6 +1243,7 @@ where
/// and performant (since capture groups don't need to be found).
///
/// `'t` is the lifetime of the literal text.
+#[derive(Clone, Debug)]
pub struct NoExpand<'t>(pub &'t str);
impl<'t> Replacer for NoExpand<'t> {