aboutsummaryrefslogtreecommitdiff
path: root/re2/prefilter.cc
diff options
context:
space:
mode:
Diffstat (limited to 're2/prefilter.cc')
-rw-r--r--re2/prefilter.cc115
1 files changed, 56 insertions, 59 deletions
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index 4b9c35d..b657357 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -2,36 +2,40 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "util/util.h"
#include "re2/prefilter.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
#include "re2/re2.h"
#include "re2/unicode_casefold.h"
#include "re2/walker-inl.h"
namespace re2 {
-static const int Trace = false;
+static const bool ExtraDebug = false;
-typedef set<string>::iterator SSIter;
-typedef set<string>::const_iterator ConstSSIter;
+typedef std::set<string>::iterator SSIter;
+typedef std::set<string>::const_iterator ConstSSIter;
-static int alloc_id = 100000; // Used for debugging.
// Initializes a Prefilter, allocating subs_ as necessary.
Prefilter::Prefilter(Op op) {
op_ = op;
subs_ = NULL;
if (op_ == AND || op_ == OR)
- subs_ = new vector<Prefilter*>;
-
- alloc_id_ = alloc_id++;
- VLOG(10) << "alloc_id: " << alloc_id_;
+ subs_ = new std::vector<Prefilter*>;
}
// Destroys a Prefilter.
Prefilter::~Prefilter() {
- VLOG(10) << "Deleted: " << alloc_id_;
if (subs_) {
- for (int i = 0; i < subs_->size(); i++)
+ for (size_t i = 0; i < subs_->size(); i++)
delete (*subs_)[i];
delete subs_;
subs_ = NULL;
@@ -45,7 +49,7 @@ Prefilter* Prefilter::Simplify() {
}
// Nothing left in the AND/OR.
- if (subs_->size() == 0) {
+ if (subs_->empty()) {
if (op_ == AND)
op_ = ALL; // AND of nothing is true
else
@@ -100,7 +104,7 @@ Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
// If a and b match op, merge their contents.
if (a->op() == op && b->op() == op) {
- for (int i = 0; i < b->subs()->size(); i++) {
+ for (size_t i = 0; i < b->subs()->size(); i++) {
Prefilter* bb = (*b->subs())[i];
a->subs()->push_back(bb);
}
@@ -136,7 +140,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
return AndOr(OR, a, b);
}
-static void SimplifyStringSet(set<string> *ss) {
+static void SimplifyStringSet(std::set<string> *ss) {
// Now make sure that the strings aren't redundant. For example, if
// we know "ab" is a required string, then it doesn't help at all to
// know that "abc" is also a required string, so delete "abc". This
@@ -157,7 +161,7 @@ static void SimplifyStringSet(set<string> *ss) {
}
}
-Prefilter* Prefilter::OrStrings(set<string>* ss) {
+Prefilter* Prefilter::OrStrings(std::set<string>* ss) {
SimplifyStringSet(ss);
Prefilter* or_prefilter = NULL;
if (!ss->empty()) {
@@ -175,7 +179,7 @@ static Rune ToLowerRune(Rune r) {
return r;
}
- CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
+ const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
if (f == NULL || r < f->lo)
return r;
return ApplyFold(f, r);
@@ -210,7 +214,7 @@ class Prefilter::Info {
static Info* Quest(Info* a);
static Info* EmptyString();
static Info* NoMatch();
- static Info* AnyChar();
+ static Info* AnyCharOrAnyByte();
static Info* CClass(CharClass* cc, bool latin1);
static Info* Literal(Rune r);
static Info* LiteralLatin1(Rune r);
@@ -222,14 +226,14 @@ class Prefilter::Info {
// Caller takes ownership of the Prefilter.
Prefilter* TakeMatch();
- set<string>& exact() { return exact_; }
+ std::set<string>& exact() { return exact_; }
bool is_exact() const { return is_exact_; }
class Walker;
private:
- set<string> exact_;
+ std::set<string> exact_;
// When is_exact_ is true, the strings that match
// are placed in exact_. When it is no longer an exact
@@ -265,18 +269,12 @@ Prefilter* Prefilter::Info::TakeMatch() {
// Format a Info in string form.
string Prefilter::Info::ToString() {
- if (this == NULL) {
- // Sometimes when iterating on children of a node,
- // some children might have NULL Info. Adding
- // the check here for NULL to take care of cases where
- // the caller is not checking.
- return "";
- }
-
if (is_exact_) {
int n = 0;
string s;
- for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {
+ for (std::set<string>::iterator i = exact_.begin();
+ i != exact_.end();
+ ++i) {
if (n++ > 0)
s += ",";
s += *i;
@@ -291,16 +289,17 @@ string Prefilter::Info::ToString() {
}
// Add the strings from src to dst.
-static void CopyIn(const set<string>& src, set<string>* dst) {
+static void CopyIn(const std::set<string>& src,
+ std::set<string>* dst) {
for (ConstSSIter i = src.begin(); i != src.end(); ++i)
dst->insert(*i);
}
// Add the cross-product of a and b to dst.
// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const set<string>& a,
- const set<string>& b,
- set<string>* dst) {
+static void CrossProduct(const std::set<string>& a,
+ const std::set<string>& b,
+ std::set<string>* dst) {
for (ConstSSIter i = a.begin(); i != a.end(); ++i)
for (ConstSSIter j = b.begin(); j != b.end(); ++j)
dst->insert(*i + *j);
@@ -418,8 +417,8 @@ Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
return info;
}
-// Constructs Info for dot (any character).
-Prefilter::Info* Prefilter::Info::AnyChar() {
+// Constructs Info for dot (any character) or \C (any byte).
+Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
Prefilter::Info* info = new Prefilter::Info();
info->match_ = new Prefilter(ALL);
return info;
@@ -454,15 +453,15 @@ Prefilter::Info* Prefilter::Info::EmptyString() {
typedef CharClass::iterator CCIter;
Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
bool latin1) {
- if (Trace) {
- VLOG(0) << "CharClassInfo:";
+ if (ExtraDebug) {
+ LOG(ERROR) << "CharClassInfo:";
for (CCIter i = cc->begin(); i != cc->end(); ++i)
- VLOG(0) << " " << i->lo << "-" << i->hi;
+ LOG(ERROR) << " " << i->lo << "-" << i->hi;
}
// If the class is too large, it's okay to overestimate.
if (cc->size() > 10)
- return AnyChar();
+ return AnyCharOrAnyByte();
Prefilter::Info *a = new Prefilter::Info();
for (CCIter i = cc->begin(); i != cc->end(); ++i)
@@ -477,9 +476,8 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
a->is_exact_ = true;
- if (Trace) {
- VLOG(0) << " = " << a->ToString();
- }
+ if (ExtraDebug)
+ LOG(ERROR) << " = " << a->ToString();
return a;
}
@@ -500,15 +498,16 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
bool latin1() { return latin1_; }
private:
bool latin1_;
- DISALLOW_EVIL_CONSTRUCTORS(Walker);
+
+ Walker(const Walker&) = delete;
+ Walker& operator=(const Walker&) = delete;
};
Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
- if (Trace) {
- LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
- }
+ if (ExtraDebug)
+ LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
- bool latin1 = re->parse_flags() & Regexp::Latin1;
+ bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
Prefilter::Info::Walker w(latin1);
Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
@@ -608,7 +607,6 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
info = child_args[0];
for (int i = 1; i < nchild_args; i++)
info = Alt(info, child_args[i]);
- VLOG(10) << "Alt: " << info->ToString();
break;
case kRegexpStar:
@@ -624,8 +622,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
case kRegexpAnyChar:
+ case kRegexpAnyByte:
// Claim nothing, except that it's not empty.
- info = AnyChar();
+ info = AnyCharOrAnyByte();
break;
case kRegexpCharClass:
@@ -638,10 +637,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
}
- if (Trace) {
- VLOG(0) << "BuildInfo " << re->ToString()
- << ": " << info->ToString();
- }
+ if (ExtraDebug)
+ LOG(ERROR) << "BuildInfo " << re->ToString()
+ << ": " << (info ? info->ToString() : "");
return info;
}
@@ -665,9 +663,6 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
}
string Prefilter::DebugString() const {
- if (this == NULL)
- return "<nil>";
-
switch (op_) {
default:
LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
@@ -680,19 +675,21 @@ string Prefilter::DebugString() const {
return "";
case AND: {
string s = "";
- for (int i = 0; i < subs_->size(); i++) {
+ for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += " ";
- s += (*subs_)[i]->DebugString();
+ Prefilter* sub = (*subs_)[i];
+ s += sub ? sub->DebugString() : "<nil>";
}
return s;
}
case OR: {
string s = "(";
- for (int i = 0; i < subs_->size(); i++) {
+ for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += "|";
- s += (*subs_)[i]->DebugString();
+ Prefilter* sub = (*subs_)[i];
+ s += sub ? sub->DebugString() : "<nil>";
}
s += ")";
return s;