diff options
-rw-r--r-- | re2/bitstate.cc | 2 | ||||
-rw-r--r-- | re2/nfa.cc | 3 | ||||
-rw-r--r-- | re2/parse.cc | 8 | ||||
-rw-r--r-- | re2/testing/re2_test.cc | 45 |
4 files changed, 57 insertions, 1 deletions
diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 518d642..8ced6ea 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -170,6 +170,8 @@ bool BitState::TrySearch(int id0, const char* p0) { Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { case kInstFail: + return false; + default: LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg; return false; @@ -529,7 +529,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, break; case kInstCapture: - match_[ip->cap()] = p; + if (ip->cap() < ncapture_) + match_[ip->cap()] = p; id = ip->out(); continue; diff --git a/re2/parse.cc b/re2/parse.cc index 56e793a..815a31f 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -1188,6 +1188,14 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { int n; if (fullrune(sp->data(), sp->size())) { n = chartorune(r, sp->data()); + // Some copies of chartorune have a bug that accepts + // encodings of values in (10FFFF, 1FFFFF] as valid. + // Those values break the character class algorithm, + // which assumes Runemax is the largest rune. + if (*r > Runemax) { + n = 1; + *r = Runeerror; + } if (!(n == 1 && *r == Runeerror)) { // no decoding error sp->remove_prefix(n); return n; diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc index 0598f85..d80f313 100644 --- a/re2/testing/re2_test.cc +++ b/re2/testing/re2_test.cc @@ -1430,4 +1430,49 @@ TEST(RE2, Bug10131674) { EXPECT_FALSE(RE2::FullMatch("hello world", re)); } +TEST(RE2, Bug18391750) { + // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer. + char t[] = {0x28, 0x28, 0xfc, 0xfc, 0x8, 0x8, 0x26, 0x26, 0x28, 0xc2, 0x9b, + 0xc5, 0xc5, 0xd4, 0x8f, 0x8f, 0x69, 0x69, 0xe7, 0x29, 0x7b, 0x37, + 0x31, 0x31, 0x7d, 0xae, 0x7c, 0x7c, 0xf3, 0x29, 0xae, 0xae, 0x2e, + 0x2a, 0x29, 0x0}; + RE2::Options opt; + opt.set_encoding(RE2::Options::EncodingLatin1); + opt.set_longest_match(true); + opt.set_dot_nl(true); + opt.set_case_sensitive(false); + RE2 re(t, opt); + CHECK(re.ok()); + RE2::PartialMatch(t, re); +} + +TEST(RE2, Bug18458852) { + // Bug in parser accepting invalid (too large) rune, + // causing compiler to fail in DCHECK in UTF-8 + // character class code. + char b[] = {0x28, 0x5, 0x5, 0x41, 0x41, 0x28, 0x24, 0x5b, 0x5e, + 0xf5, 0x87, 0x87, 0x90, 0x29, 0x5d, 0x29, 0x29, 0x0}; + RE2 re(b); + CHECK(!re.ok()); +} + +TEST(RE2, Bug18523943) { + // Bug in bitstate: case kFailInst was merged into the default with LOG(DFATAL). + + RE2::Options opt; + char a[] = {0x29, 0x29, 0x24, 0x0}; + char b[] = {0x28, 0xa, 0x2a, 0x2a, 0x29, 0x0}; + opt.set_log_errors(false); + opt.set_encoding(RE2::Options::EncodingLatin1); + opt.set_posix_syntax(true); + opt.set_longest_match(true); + opt.set_literal(false); + opt.set_never_nl(true); + + RE2 re((const char*)b, opt); + CHECK(re.ok()); + string s1; + CHECK(!RE2::PartialMatch((const char*)a, re, &s1)); +} + } // namespace re2 |