diff options
Diffstat (limited to 'phonenumbermatcher.h')
-rw-r--r-- | phonenumbermatcher.h | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/phonenumbermatcher.h b/phonenumbermatcher.h new file mode 100644 index 0000000..d9ead8d --- /dev/null +++ b/phonenumbermatcher.h @@ -0,0 +1,195 @@ +// Copyright (C) 2011 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Lara Rennie +// Author: Tao Huang +// +// This is a direct port from PhoneNumberMatcher.java. +// Changes to this class should also happen to the Java version, whenever it +// makes sense. + +#ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ +#define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ + +#include <string> +#include <vector> + +#include "phonenumbers/base/basictypes.h" +#include "phonenumbers/base/memory/scoped_ptr.h" +#include "phonenumbers/callback.h" +#include "phonenumbers/regexp_adapter.h" + +namespace i18n { +namespace phonenumbers { + +template <class R, class A1, class A2, class A3, class A4> + class ResultCallback4; + +using std::string; +using std::vector; + +class AlternateFormats; +class NumberFormat; +class PhoneNumber; +class PhoneNumberMatch; +class PhoneNumberMatcherRegExps; +class PhoneNumberUtil; + +class PhoneNumberMatcher { + friend class PhoneNumberMatcherTest; + public: + // Leniency when finding potential phone numbers in text segments. The levels + // here are ordered in increasing strictness. + enum Leniency { + // Phone numbers accepted are possible, but not necessarily valid. + POSSIBLE, + // Phone numbers accepted are possible and valid. + VALID, + // Phone numbers accepted are valid and are grouped in a possible way for + // this locale. For example, a US number written as "65 02 53 00 00" is not + // accepted at this leniency level, whereas "650 253 0000" or "6502530000" + // are. Numbers with more than one '/' symbol are also dropped at this + // level. + // Warning: The next two levels might result in lower coverage especially + // for regions outside of country code "+1". If you are not sure about which + // level to use, you can send an e-mail to the discussion group + // http://groups.google.com/group/libphonenumber-discuss/ + STRICT_GROUPING, + // Phone numbers accepted are valid and are grouped in the same way that we + // would have formatted it, or as a single block. For example, a US number + // written as "650 2530000" is not accepted at this leniency level, whereas + // "650 253 0000" or "6502530000" are. + EXACT_GROUPING, + }; + + // Constructs a phone number matcher. + PhoneNumberMatcher(const PhoneNumberUtil& util, + const string& text, + const string& region_code, + Leniency leniency, + int max_tries); + + // Wrapper to construct a phone number matcher, with no limitation on the + // number of retries and VALID Leniency. + PhoneNumberMatcher(const string& text, + const string& region_code); + + ~PhoneNumberMatcher(); + + // Returns true if the text sequence has another match. + bool HasNext(); + + // Gets next match from text sequence. + bool Next(PhoneNumberMatch* match); + + private: + // The potential states of a PhoneNumberMatcher. + enum State { + NOT_READY, + READY, + DONE, + }; + + // Attempts to extract a match from a candidate string. Returns true if a + // match is found, otherwise returns false. The value "offset" refers to the + // start index of the candidate string within the overall text. + bool Find(int index, PhoneNumberMatch* match); + + // Checks a number was formatted with a national prefix, if the number was + // found in national format, and a national prefix is required for that + // number. Returns false if the number needed to have a national prefix and + // none was found. + bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const; + + // Attempts to extract a match from candidate. Returns true if the match was + // found, otherwise returns false. + bool ExtractMatch(const string& candidate, int offset, + PhoneNumberMatch* match); + + // Attempts to extract a match from a candidate string if the whole candidate + // does not qualify as a match. Returns true if a match is found, otherwise + // returns false. + bool ExtractInnerMatch(const string& candidate, int offset, + PhoneNumberMatch* match); + + // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and + // verifies it matches the requested leniency. If parsing and verification + // succeed, returns true, otherwise this method returns false; + bool ParseAndVerify(const string& candidate, int offset, + PhoneNumberMatch* match); + + bool CheckNumberGroupingIsValid( + const PhoneNumber& phone_number, + const string& candidate, + ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&, + const string&, const vector<string>&>* checker) const; + + void GetNationalNumberGroups( + const PhoneNumber& number, + const NumberFormat* formatting_pattern, + vector<string>* digit_blocks) const; + + bool AllNumberGroupsAreExactlyPresent( + const PhoneNumberUtil& util, + const PhoneNumber& phone_number, + const string& normalized_candidate, + const vector<string>& formatted_number_groups) const; + + bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number, + const string& candidate) const; + + // Helper method to determine if a character is a Latin-script letter or not. + // For our purposes, combining marks should also return true since we assume + // they have been added to a preceding Latin character. + static bool IsLatinLetter(char32 letter); + + // Helper class holding useful regular expressions. + const PhoneNumberMatcherRegExps* reg_exps_; + + // Helper class holding loaded data containing alternate ways phone numbers + // might be formatted for certain regions. + const AlternateFormats* alternate_formats_; + + // The phone number utility; + const PhoneNumberUtil& phone_util_; + + // The text searched for phone numbers; + const string text_; + + // The region(country) to assume for phone numbers without an international + // prefix. + const string preferred_region_; + + // The degree of validation requested. + Leniency leniency_; + + // The maximum number of retries after matching an invalid number. + int max_tries_; + + // The iteration tristate. + State state_; + + // The last successful match, NULL unless in State.READY. + scoped_ptr<PhoneNumberMatch> last_match_; + + // The next index to start searching at. Undefined in State.DONE. + int search_index_; + + DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher); +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ |