aboutsummaryrefslogtreecommitdiff
path: root/re2/testing/exhaustive2_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 're2/testing/exhaustive2_test.cc')
-rw-r--r--re2/testing/exhaustive2_test.cc70
1 files changed, 70 insertions, 0 deletions
diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc
new file mode 100644
index 0000000..c5fec5b
--- /dev/null
+++ b/re2/testing/exhaustive2_test.cc
@@ -0,0 +1,70 @@
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+#include "util/test.h"
+#include "re2/re2.h"
+#include "re2/testing/exhaustive_tester.h"
+
+DECLARE_string(regexp_engines);
+
+namespace re2 {
+
+// Test empty string matches (aka "(?:)")
+TEST(EmptyString, Exhaustive) {
+ ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
+ RegexpGenerator::EgrepOps(),
+ 5, Split("", "ab"), "", "");
+}
+
+// Test escaped versions of regexp syntax.
+TEST(Punctuation, Literals) {
+ vector<string> alphabet = Explode("()*+?{}[]\\^$.");
+ vector<string> escaped = alphabet;
+ for (int i = 0; i < escaped.size(); i++)
+ escaped[i] = "\\" + escaped[i];
+ ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
+ 2, alphabet, "", "");
+}
+
+// Test ^ $ . \A \z in presence of line endings.
+// Have to wrap the empty-width ones in (?:) so that
+// they can be repeated -- PCRE rejects ^* but allows (?:^)*
+TEST(LineEnds, Exhaustive) {
+ ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
+ RegexpGenerator::EgrepOps(),
+ 4, Explode("ab\n"), "", "");
+}
+
+// Test what does and does not match \n.
+// This would be a good test, except that PCRE seems to have a bug:
+// in single-byte character set mode (the default),
+// [^a] matches \n, but in UTF-8 mode it does not.
+// So when we run the test, the tester complains that
+// we don't agree with PCRE, but it's PCRE that is at fault.
+// For what it's worth, Perl gets this right (matches
+// regardless of whether UTF-8 input is selected):
+//
+// #!/usr/bin/perl
+// use POSIX qw(locale_h);
+// print "matches in latin1\n" if "\n" =~ /[^a]/;
+// setlocale("en_US.utf8");
+// print "matches in utf8\n" if "\n" =~ /[^a]/;
+//
+// The rule chosen for RE2 is that by default, like Perl,
+// dot does not match \n but negated character classes [^a] do.
+// (?s) will allow dot to match \n; there is no way in RE2
+// to stop [^a] from matching \n, though the underlying library
+// provides a mechanism, and RE2 could add new syntax if needed.
+//
+// TEST(Newlines, Exhaustive) {
+// vector<string> empty_vector;
+// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
+// RegexpGenerator::EgrepOps(),
+// 4, Explode("a\n"), "");
+// }
+
+} // namespace re2
+