// Copyright (C) 2011 The Libphonenumber Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Author: George Yakovlev // Philippe Liard #include "phonenumbers/regexp_adapter_re2.h" #include #include #include #include #include "phonenumbers/base/basictypes.h" #include "phonenumbers/base/logging.h" #include "phonenumbers/stringutil.h" namespace i18n { namespace phonenumbers { using re2::StringPiece; // Implementation of RegExpInput abstract class. class RE2RegExpInput : public RegExpInput { public: explicit RE2RegExpInput(const string& utf8_input) : string_(utf8_input), utf8_input_(string_) {} virtual string ToString() const { return utf8_input_.ToString(); } StringPiece* Data() { return &utf8_input_; } private: // string_ holds the string referenced by utf8_input_ as StringPiece doesn't // copy the string passed in. const string string_; StringPiece utf8_input_; }; namespace { template bool DispatchRE2Call(Function regex_function, Input input, const RE2& regexp, string* out1, string* out2, string* out3) { if (out3) { return regex_function(input, regexp, out1, out2, out3); } if (out2) { return regex_function(input, regexp, out1, out2); } if (out1) { return regex_function(input, regexp, out1); } return regex_function(input, regexp); } // Replaces unescaped dollar-signs with backslashes. Backslashes are deleted // when they escape dollar-signs. string TransformRegularExpressionToRE2Syntax(const string& regex) { string re2_regex(regex); if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) { return regex; } // If we replaced a dollar sign with a backslash and there are now two // backslashes in the string, we assume that the dollar-sign was previously // escaped and that we need to retain it. To do this, we replace pairs of // backslashes with a dollar sign. GlobalReplaceSubstring("\\\\", "$", &re2_regex); return re2_regex; } } // namespace // Implementation of RegExp abstract class. class RE2RegExp : public RegExp { public: explicit RE2RegExp(const string& utf8_regexp) : utf8_regexp_(utf8_regexp) {} virtual bool Consume(RegExpInput* input_string, bool anchor_at_start, string* matched_string1, string* matched_string2, string* matched_string3) const { DCHECK(input_string); StringPiece* utf8_input = static_cast(input_string)->Data(); if (anchor_at_start) { return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_, matched_string1, matched_string2, matched_string3); } else { return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_, matched_string1, matched_string2, matched_string3); } } virtual bool Match(const string& input_string, bool full_match, string* matched_string) const { if (full_match) { return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_, matched_string, NULL, NULL); } else { return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_, matched_string, NULL, NULL); } } virtual bool Replace(string* string_to_process, bool global, const string& replacement_string) const { DCHECK(string_to_process); const string re2_replacement_string = TransformRegularExpressionToRE2Syntax(replacement_string); if (global) { return RE2::GlobalReplace(string_to_process, utf8_regexp_, re2_replacement_string); } else { return RE2::Replace(string_to_process, utf8_regexp_, re2_replacement_string); } } private: RE2 utf8_regexp_; }; RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const { return new RE2RegExpInput(utf8_input); } RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const { return new RE2RegExp(utf8_regexp); } } // namespace phonenumbers } // namespace i18n