diff options
Diffstat (limited to 're2/testing/exhaustive2_test.cc')
-rw-r--r-- | re2/testing/exhaustive2_test.cc | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc new file mode 100644 index 0000000..c5fec5b --- /dev/null +++ b/re2/testing/exhaustive2_test.cc @@ -0,0 +1,70 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Exhaustive testing of regular expression matching. + +#include "util/test.h" +#include "re2/re2.h" +#include "re2/testing/exhaustive_tester.h" + +DECLARE_string(regexp_engines); + +namespace re2 { + +// Test empty string matches (aka "(?:)") +TEST(EmptyString, Exhaustive) { + ExhaustiveTest(2, 2, Split(" ", "(?:) a"), + RegexpGenerator::EgrepOps(), + 5, Split("", "ab"), "", ""); +} + +// Test escaped versions of regexp syntax. +TEST(Punctuation, Literals) { + vector<string> alphabet = Explode("()*+?{}[]\\^$."); + vector<string> escaped = alphabet; + for (int i = 0; i < escaped.size(); i++) + escaped[i] = "\\" + escaped[i]; + ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), + 2, alphabet, "", ""); +} + +// Test ^ $ . \A \z in presence of line endings. +// Have to wrap the empty-width ones in (?:) so that +// they can be repeated -- PCRE rejects ^* but allows (?:^)* +TEST(LineEnds, Exhaustive) { + ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"), + RegexpGenerator::EgrepOps(), + 4, Explode("ab\n"), "", ""); +} + +// Test what does and does not match \n. +// This would be a good test, except that PCRE seems to have a bug: +// in single-byte character set mode (the default), +// [^a] matches \n, but in UTF-8 mode it does not. +// So when we run the test, the tester complains that +// we don't agree with PCRE, but it's PCRE that is at fault. +// For what it's worth, Perl gets this right (matches +// regardless of whether UTF-8 input is selected): +// +// #!/usr/bin/perl +// use POSIX qw(locale_h); +// print "matches in latin1\n" if "\n" =~ /[^a]/; +// setlocale("en_US.utf8"); +// print "matches in utf8\n" if "\n" =~ /[^a]/; +// +// The rule chosen for RE2 is that by default, like Perl, +// dot does not match \n but negated character classes [^a] do. +// (?s) will allow dot to match \n; there is no way in RE2 +// to stop [^a] from matching \n, though the underlying library +// provides a mechanism, and RE2 could add new syntax if needed. +// +// TEST(Newlines, Exhaustive) { +// vector<string> empty_vector; +// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), +// RegexpGenerator::EgrepOps(), +// 4, Explode("a\n"), ""); +// } + +} // namespace re2 + |