diff options
Diffstat (limited to 'regexp_adapter_re2.cc')
-rw-r--r-- | regexp_adapter_re2.cc | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/regexp_adapter_re2.cc b/regexp_adapter_re2.cc new file mode 100644 index 0000000..64a58ed --- /dev/null +++ b/regexp_adapter_re2.cc @@ -0,0 +1,161 @@ +// Copyright (C) 2011 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: George Yakovlev +// Philippe Liard + +#include "phonenumbers/regexp_adapter_re2.h" + +#include <cstddef> +#include <string> + +#include <re2/re2.h> +#include <re2/stringpiece.h> + +#include "phonenumbers/base/basictypes.h" +#include "phonenumbers/base/logging.h" +#include "phonenumbers/stringutil.h" + +namespace i18n { +namespace phonenumbers { + +using re2::StringPiece; + +// Implementation of RegExpInput abstract class. +class RE2RegExpInput : public RegExpInput { + public: + explicit RE2RegExpInput(const string& utf8_input) + : string_(utf8_input), + utf8_input_(string_) {} + + virtual string ToString() const { + return utf8_input_.ToString(); + } + + StringPiece* Data() { + return &utf8_input_; + } + + private: + // string_ holds the string referenced by utf8_input_ as StringPiece doesn't + // copy the string passed in. + const string string_; + StringPiece utf8_input_; +}; + +namespace { + +template <typename Function, typename Input> +bool DispatchRE2Call(Function regex_function, + Input input, + const RE2& regexp, + string* out1, + string* out2, + string* out3) { + if (out3) { + return regex_function(input, regexp, out1, out2, out3); + } + if (out2) { + return regex_function(input, regexp, out1, out2); + } + if (out1) { + return regex_function(input, regexp, out1); + } + return regex_function(input, regexp); +} + +// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted +// when they escape dollar-signs. +string TransformRegularExpressionToRE2Syntax(const string& regex) { + string re2_regex(regex); + if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) { + return regex; + } + // If we replaced a dollar sign with a backslash and there are now two + // backslashes in the string, we assume that the dollar-sign was previously + // escaped and that we need to retain it. To do this, we replace pairs of + // backslashes with a dollar sign. + GlobalReplaceSubstring("\\\\", "$", &re2_regex); + return re2_regex; +} + +} // namespace + +// Implementation of RegExp abstract class. +class RE2RegExp : public RegExp { + public: + explicit RE2RegExp(const string& utf8_regexp) + : utf8_regexp_(utf8_regexp) {} + + virtual bool Consume(RegExpInput* input_string, + bool anchor_at_start, + string* matched_string1, + string* matched_string2, + string* matched_string3) const { + DCHECK(input_string); + StringPiece* utf8_input = + static_cast<RE2RegExpInput*>(input_string)->Data(); + + if (anchor_at_start) { + return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_, + matched_string1, matched_string2, + matched_string3); + } else { + return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_, + matched_string1, matched_string2, + matched_string3); + } + } + + virtual bool Match(const string& input_string, + bool full_match, + string* matched_string) const { + if (full_match) { + return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_, + matched_string, NULL, NULL); + } else { + return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_, + matched_string, NULL, NULL); + } + } + + virtual bool Replace(string* string_to_process, + bool global, + const string& replacement_string) const { + DCHECK(string_to_process); + const string re2_replacement_string = + TransformRegularExpressionToRE2Syntax(replacement_string); + if (global) { + return RE2::GlobalReplace(string_to_process, utf8_regexp_, + re2_replacement_string); + } else { + return RE2::Replace(string_to_process, utf8_regexp_, + re2_replacement_string); + } + } + + private: + RE2 utf8_regexp_; +}; + +RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const { + return new RE2RegExpInput(utf8_input); +} + +RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const { + return new RE2RegExp(utf8_regexp); +} + +} // namespace phonenumbers +} // namespace i18n |