diff options
Diffstat (limited to 're2/prefilter.cc')
-rw-r--r-- | re2/prefilter.cc | 115 |
1 files changed, 56 insertions, 59 deletions
diff --git a/re2/prefilter.cc b/re2/prefilter.cc index 4b9c35d..b657357 100644 --- a/re2/prefilter.cc +++ b/re2/prefilter.cc @@ -2,36 +2,40 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "util/util.h" #include "re2/prefilter.h" + +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/re2.h" #include "re2/unicode_casefold.h" #include "re2/walker-inl.h" namespace re2 { -static const int Trace = false; +static const bool ExtraDebug = false; -typedef set<string>::iterator SSIter; -typedef set<string>::const_iterator ConstSSIter; +typedef std::set<string>::iterator SSIter; +typedef std::set<string>::const_iterator ConstSSIter; -static int alloc_id = 100000; // Used for debugging. // Initializes a Prefilter, allocating subs_ as necessary. Prefilter::Prefilter(Op op) { op_ = op; subs_ = NULL; if (op_ == AND || op_ == OR) - subs_ = new vector<Prefilter*>; - - alloc_id_ = alloc_id++; - VLOG(10) << "alloc_id: " << alloc_id_; + subs_ = new std::vector<Prefilter*>; } // Destroys a Prefilter. Prefilter::~Prefilter() { - VLOG(10) << "Deleted: " << alloc_id_; if (subs_) { - for (int i = 0; i < subs_->size(); i++) + for (size_t i = 0; i < subs_->size(); i++) delete (*subs_)[i]; delete subs_; subs_ = NULL; @@ -45,7 +49,7 @@ Prefilter* Prefilter::Simplify() { } // Nothing left in the AND/OR. - if (subs_->size() == 0) { + if (subs_->empty()) { if (op_ == AND) op_ = ALL; // AND of nothing is true else @@ -100,7 +104,7 @@ Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { // If a and b match op, merge their contents. if (a->op() == op && b->op() == op) { - for (int i = 0; i < b->subs()->size(); i++) { + for (size_t i = 0; i < b->subs()->size(); i++) { Prefilter* bb = (*b->subs())[i]; a->subs()->push_back(bb); } @@ -136,7 +140,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { return AndOr(OR, a, b); } -static void SimplifyStringSet(set<string> *ss) { +static void SimplifyStringSet(std::set<string> *ss) { // Now make sure that the strings aren't redundant. For example, if // we know "ab" is a required string, then it doesn't help at all to // know that "abc" is also a required string, so delete "abc". This @@ -157,7 +161,7 @@ static void SimplifyStringSet(set<string> *ss) { } } -Prefilter* Prefilter::OrStrings(set<string>* ss) { +Prefilter* Prefilter::OrStrings(std::set<string>* ss) { SimplifyStringSet(ss); Prefilter* or_prefilter = NULL; if (!ss->empty()) { @@ -175,7 +179,7 @@ static Rune ToLowerRune(Rune r) { return r; } - CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); + const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); if (f == NULL || r < f->lo) return r; return ApplyFold(f, r); @@ -210,7 +214,7 @@ class Prefilter::Info { static Info* Quest(Info* a); static Info* EmptyString(); static Info* NoMatch(); - static Info* AnyChar(); + static Info* AnyCharOrAnyByte(); static Info* CClass(CharClass* cc, bool latin1); static Info* Literal(Rune r); static Info* LiteralLatin1(Rune r); @@ -222,14 +226,14 @@ class Prefilter::Info { // Caller takes ownership of the Prefilter. Prefilter* TakeMatch(); - set<string>& exact() { return exact_; } + std::set<string>& exact() { return exact_; } bool is_exact() const { return is_exact_; } class Walker; private: - set<string> exact_; + std::set<string> exact_; // When is_exact_ is true, the strings that match // are placed in exact_. When it is no longer an exact @@ -265,18 +269,12 @@ Prefilter* Prefilter::Info::TakeMatch() { // Format a Info in string form. string Prefilter::Info::ToString() { - if (this == NULL) { - // Sometimes when iterating on children of a node, - // some children might have NULL Info. Adding - // the check here for NULL to take care of cases where - // the caller is not checking. - return ""; - } - if (is_exact_) { int n = 0; string s; - for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) { + for (std::set<string>::iterator i = exact_.begin(); + i != exact_.end(); + ++i) { if (n++ > 0) s += ","; s += *i; @@ -291,16 +289,17 @@ string Prefilter::Info::ToString() { } // Add the strings from src to dst. -static void CopyIn(const set<string>& src, set<string>* dst) { +static void CopyIn(const std::set<string>& src, + std::set<string>* dst) { for (ConstSSIter i = src.begin(); i != src.end(); ++i) dst->insert(*i); } // Add the cross-product of a and b to dst. // (For each string i in a and j in b, add i+j.) -static void CrossProduct(const set<string>& a, - const set<string>& b, - set<string>* dst) { +static void CrossProduct(const std::set<string>& a, + const std::set<string>& b, + std::set<string>* dst) { for (ConstSSIter i = a.begin(); i != a.end(); ++i) for (ConstSSIter j = b.begin(); j != b.end(); ++j) dst->insert(*i + *j); @@ -418,8 +417,8 @@ Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { return info; } -// Constructs Info for dot (any character). -Prefilter::Info* Prefilter::Info::AnyChar() { +// Constructs Info for dot (any character) or \C (any byte). +Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() { Prefilter::Info* info = new Prefilter::Info(); info->match_ = new Prefilter(ALL); return info; @@ -454,15 +453,15 @@ Prefilter::Info* Prefilter::Info::EmptyString() { typedef CharClass::iterator CCIter; Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, bool latin1) { - if (Trace) { - VLOG(0) << "CharClassInfo:"; + if (ExtraDebug) { + LOG(ERROR) << "CharClassInfo:"; for (CCIter i = cc->begin(); i != cc->end(); ++i) - VLOG(0) << " " << i->lo << "-" << i->hi; + LOG(ERROR) << " " << i->lo << "-" << i->hi; } // If the class is too large, it's okay to overestimate. if (cc->size() > 10) - return AnyChar(); + return AnyCharOrAnyByte(); Prefilter::Info *a = new Prefilter::Info(); for (CCIter i = cc->begin(); i != cc->end(); ++i) @@ -477,9 +476,8 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, a->is_exact_ = true; - if (Trace) { - VLOG(0) << " = " << a->ToString(); - } + if (ExtraDebug) + LOG(ERROR) << " = " << a->ToString(); return a; } @@ -500,15 +498,16 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { bool latin1() { return latin1_; } private: bool latin1_; - DISALLOW_EVIL_CONSTRUCTORS(Walker); + + Walker(const Walker&) = delete; + Walker& operator=(const Walker&) = delete; }; Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { - if (Trace) { - LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); - } + if (ExtraDebug) + LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString(); - bool latin1 = re->parse_flags() & Regexp::Latin1; + bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; Prefilter::Info::Walker w(latin1); Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); @@ -608,7 +607,6 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( info = child_args[0]; for (int i = 1; i < nchild_args; i++) info = Alt(info, child_args[i]); - VLOG(10) << "Alt: " << info->ToString(); break; case kRegexpStar: @@ -624,8 +622,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; case kRegexpAnyChar: + case kRegexpAnyByte: // Claim nothing, except that it's not empty. - info = AnyChar(); + info = AnyCharOrAnyByte(); break; case kRegexpCharClass: @@ -638,10 +637,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; } - if (Trace) { - VLOG(0) << "BuildInfo " << re->ToString() - << ": " << info->ToString(); - } + if (ExtraDebug) + LOG(ERROR) << "BuildInfo " << re->ToString() + << ": " << (info ? info->ToString() : ""); return info; } @@ -665,9 +663,6 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) { } string Prefilter::DebugString() const { - if (this == NULL) - return "<nil>"; - switch (op_) { default: LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; @@ -680,19 +675,21 @@ string Prefilter::DebugString() const { return ""; case AND: { string s = ""; - for (int i = 0; i < subs_->size(); i++) { + for (size_t i = 0; i < subs_->size(); i++) { if (i > 0) s += " "; - s += (*subs_)[i]->DebugString(); + Prefilter* sub = (*subs_)[i]; + s += sub ? sub->DebugString() : "<nil>"; } return s; } case OR: { string s = "("; - for (int i = 0; i < subs_->size(); i++) { + for (size_t i = 0; i < subs_->size(); i++) { if (i > 0) s += "|"; - s += (*subs_)[i]->DebugString(); + Prefilter* sub = (*subs_)[i]; + s += sub ? sub->DebugString() : "<nil>"; } s += ")"; return s; |