summaryrefslogtreecommitdiff
path: root/regexp_adapter_re2.cc
diff options
context:
space:
mode:
Diffstat (limited to 'regexp_adapter_re2.cc')
-rw-r--r--regexp_adapter_re2.cc161
1 files changed, 161 insertions, 0 deletions
diff --git a/regexp_adapter_re2.cc b/regexp_adapter_re2.cc
new file mode 100644
index 0000000..64a58ed
--- /dev/null
+++ b/regexp_adapter_re2.cc
@@ -0,0 +1,161 @@
+// Copyright (C) 2011 The Libphonenumber Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: George Yakovlev
+// Philippe Liard
+
+#include "phonenumbers/regexp_adapter_re2.h"
+
+#include <cstddef>
+#include <string>
+
+#include <re2/re2.h>
+#include <re2/stringpiece.h>
+
+#include "phonenumbers/base/basictypes.h"
+#include "phonenumbers/base/logging.h"
+#include "phonenumbers/stringutil.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+using re2::StringPiece;
+
+// Implementation of RegExpInput abstract class.
+class RE2RegExpInput : public RegExpInput {
+ public:
+ explicit RE2RegExpInput(const string& utf8_input)
+ : string_(utf8_input),
+ utf8_input_(string_) {}
+
+ virtual string ToString() const {
+ return utf8_input_.ToString();
+ }
+
+ StringPiece* Data() {
+ return &utf8_input_;
+ }
+
+ private:
+ // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
+ // copy the string passed in.
+ const string string_;
+ StringPiece utf8_input_;
+};
+
+namespace {
+
+template <typename Function, typename Input>
+bool DispatchRE2Call(Function regex_function,
+ Input input,
+ const RE2& regexp,
+ string* out1,
+ string* out2,
+ string* out3) {
+ if (out3) {
+ return regex_function(input, regexp, out1, out2, out3);
+ }
+ if (out2) {
+ return regex_function(input, regexp, out1, out2);
+ }
+ if (out1) {
+ return regex_function(input, regexp, out1);
+ }
+ return regex_function(input, regexp);
+}
+
+// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
+// when they escape dollar-signs.
+string TransformRegularExpressionToRE2Syntax(const string& regex) {
+ string re2_regex(regex);
+ if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
+ return regex;
+ }
+ // If we replaced a dollar sign with a backslash and there are now two
+ // backslashes in the string, we assume that the dollar-sign was previously
+ // escaped and that we need to retain it. To do this, we replace pairs of
+ // backslashes with a dollar sign.
+ GlobalReplaceSubstring("\\\\", "$", &re2_regex);
+ return re2_regex;
+}
+
+} // namespace
+
+// Implementation of RegExp abstract class.
+class RE2RegExp : public RegExp {
+ public:
+ explicit RE2RegExp(const string& utf8_regexp)
+ : utf8_regexp_(utf8_regexp) {}
+
+ virtual bool Consume(RegExpInput* input_string,
+ bool anchor_at_start,
+ string* matched_string1,
+ string* matched_string2,
+ string* matched_string3) const {
+ DCHECK(input_string);
+ StringPiece* utf8_input =
+ static_cast<RE2RegExpInput*>(input_string)->Data();
+
+ if (anchor_at_start) {
+ return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_,
+ matched_string1, matched_string2,
+ matched_string3);
+ } else {
+ return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_,
+ matched_string1, matched_string2,
+ matched_string3);
+ }
+ }
+
+ virtual bool Match(const string& input_string,
+ bool full_match,
+ string* matched_string) const {
+ if (full_match) {
+ return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_,
+ matched_string, NULL, NULL);
+ } else {
+ return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_,
+ matched_string, NULL, NULL);
+ }
+ }
+
+ virtual bool Replace(string* string_to_process,
+ bool global,
+ const string& replacement_string) const {
+ DCHECK(string_to_process);
+ const string re2_replacement_string =
+ TransformRegularExpressionToRE2Syntax(replacement_string);
+ if (global) {
+ return RE2::GlobalReplace(string_to_process, utf8_regexp_,
+ re2_replacement_string);
+ } else {
+ return RE2::Replace(string_to_process, utf8_regexp_,
+ re2_replacement_string);
+ }
+ }
+
+ private:
+ RE2 utf8_regexp_;
+};
+
+RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
+ return new RE2RegExpInput(utf8_input);
+}
+
+RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
+ return new RE2RegExp(utf8_regexp);
+}
+
+} // namespace phonenumbers
+} // namespace i18n