aboutsummaryrefslogtreecommitdiff
path: root/re2/testing/tester.h
blob: 6e16e7757f13c2e888e4f82b09fd865e83148527 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// Copyright 2008 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Comparative tester for regular expression matching.
// Checks all implementations against each other.

#ifndef RE2_TESTING_TESTER_H__
#define RE2_TESTING_TESTER_H__

#include "re2/stringpiece.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/re2.h"
#include "util/pcre.h"

namespace re2 {

class Regexp;

// All the supported regexp engines.
enum Engine {
  kEngineBacktrack = 0,    // Prog::BadSearchBacktrack
  kEngineNFA,              // Prog::SearchNFA
  kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
  kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
  kEngineOnePass,          // Prog::SearchOnePass, if applicable
  kEngineBitState,         // Prog::SearchBitState
  kEngineRE2,              // RE2, all submatches
  kEngineRE2a,             // RE2, only ask for match[0]
  kEngineRE2b,             // RE2, only ask whether it matched
  kEnginePCRE,             // PCRE (util/pcre.h)

  kEngineMax,
};

// Make normal math on the enum preserve the type.
// By default, C++ doesn't define ++ on enum, and e+1 has type int.
static inline void operator++(Engine& e, int unused) {
  e = static_cast<Engine>(e+1);
}

static inline Engine operator+(Engine e, int i) {
  return static_cast<Engine>(static_cast<int>(e)+i);
}

// A TestInstance caches per-regexp state for a given
// regular expression in a given configuration
// (UTF-8 vs Latin1, longest vs first match, etc.).
class TestInstance {
 public:
  struct Result;

  TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
               Regexp::ParseFlags flags);
  ~TestInstance();
  Regexp::ParseFlags flags() { return flags_; }
  bool error() { return error_; }

  // Runs a single test case: search in text, which is in context,
  // using the given anchoring.
  bool RunCase(const StringPiece& text, const StringPiece& context,
               Prog::Anchor anchor);

 private:
  // Runs a single search using the named engine type.
  void RunSearch(Engine type,
                 const StringPiece& text, const StringPiece& context,
                 Prog::Anchor anchor,
                 Result *result);

  void LogMatch(const char* prefix, Engine e, const StringPiece& text,
                const StringPiece& context, Prog::Anchor anchor);

  const StringPiece& regexp_str_;   // regexp being tested
  Prog::MatchKind kind_;            // kind of match
  Regexp::ParseFlags flags_;        // flags for parsing regexp_str_
  bool error_;                      // error during constructor?

  Regexp* regexp_;                  // parsed regexp
  int num_captures_;                // regexp_->NumCaptures() cached
  Prog* prog_;                      // compiled program
  Prog* rprog_;                     // compiled reverse program
  PCRE* re_;                        // PCRE implementation
  RE2* re2_;                        // RE2 implementation

  DISALLOW_EVIL_CONSTRUCTORS(TestInstance);
};

// A group of TestInstances for all possible configurations.
class Tester {
 public:
  explicit Tester(const StringPiece& regexp);
  ~Tester();

  bool error() { return error_; }

  // Runs a single test case: search in text, which is in context,
  // using the given anchoring.
  bool TestCase(const StringPiece& text, const StringPiece& context,
                Prog::Anchor anchor);

  // Run TestCase(text, text, anchor) for all anchoring modes.
  bool TestInput(const StringPiece& text);

  // Run TestCase(text, context, anchor) for all anchoring modes.
  bool TestInputInContext(const StringPiece& text, const StringPiece& context);

 private:
  bool error_;
  vector<TestInstance*> v_;

  DISALLOW_EVIL_CONSTRUCTORS(Tester);
};

// Run all possible tests using regexp and text.
bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);

}  // namespace re2

#endif  // RE2_TESTING_TESTER_H__