summaryrefslogtreecommitdiff
path: root/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
diff options
context:
space:
mode:
Diffstat (limited to 'libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java')
-rw-r--r--libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java73
1 files changed, 47 insertions, 26 deletions
diff --git a/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java b/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
index 3a6e3a68..0cb73fd1 100644
--- a/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
+++ b/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
@@ -23,7 +23,7 @@ import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
-
+import com.google.i18n.phonenumbers.internal.RegexCache;
import java.lang.Character.UnicodeBlock;
import java.util.Iterator;
import java.util.NoSuchElementException;
@@ -207,6 +207,12 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
/** The next index to start searching at. Undefined in {@link State#DONE}. */
private int searchIndex = 0;
+ // A cache for frequently used country-specific regular expressions. Set to 32 to cover ~2-3
+ // countries being used for the same doc with ~10 patterns for each country. Some pages will have
+ // a lot more countries in use, but typically fewer numbers for each so expanding the cache for
+ // that use-case won't have a lot of benefit.
+ private final RegexCache regexCache = new RegexCache(32);
+
/**
* Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
* new instance.
@@ -413,7 +419,7 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
- if (leniency.verify(number, candidate, phoneUtil)) {
+ if (leniency.verify(number, candidate, phoneUtil, this)) {
// We used parseAndKeepRawInput to create this number, but for now we don't return the extra
// values parsed. TODO: stop clearing all values here and switch all users over
// to using rawInput() rather than the rawString() of PhoneNumberMatch.
@@ -527,46 +533,61 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
/**
* Helper method to get the national-number part of a number, formatted without any national
- * prefix, and return it as a set of digit blocks that would be formatted together.
+ * prefix, and return it as a set of digit blocks that would be formatted together following
+ * standard formatting rules.
+ */
+ private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number) {
+ // This will be in the format +CC-DG1-DG2-DGX;ext=EXT where DG1..DGX represents groups of
+ // digits.
+ String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
+ // We remove the extension part from the formatted string before splitting it into different
+ // groups.
+ int endIndex = rfc3966Format.indexOf(';');
+ if (endIndex < 0) {
+ endIndex = rfc3966Format.length();
+ }
+ // The country-code will have a '-' following it.
+ int startIndex = rfc3966Format.indexOf('-') + 1;
+ return rfc3966Format.substring(startIndex, endIndex).split("-");
+ }
+
+ /**
+ * Helper method to get the national-number part of a number, formatted without any national
+ * prefix, and return it as a set of digit blocks that should be formatted together according to
+ * the formatting pattern passed in.
*/
private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
NumberFormat formattingPattern) {
- if (formattingPattern == null) {
- // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
- String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
- // We remove the extension part from the formatted string before splitting it into different
- // groups.
- int endIndex = rfc3966Format.indexOf(';');
- if (endIndex < 0) {
- endIndex = rfc3966Format.length();
- }
- // The country-code will have a '-' following it.
- int startIndex = rfc3966Format.indexOf('-') + 1;
- return rfc3966Format.substring(startIndex, endIndex).split("-");
- } else {
- // We format the NSN only, and split that according to the separator.
- String nationalSignificantNumber = util.getNationalSignificantNumber(number);
- return util.formatNsnUsingPattern(nationalSignificantNumber,
- formattingPattern, PhoneNumberFormat.RFC3966).split("-");
- }
+ // If a format is provided, we format the NSN only, and split that according to the separator.
+ String nationalSignificantNumber = util.getNationalSignificantNumber(number);
+ return util.formatNsnUsingPattern(nationalSignificantNumber,
+ formattingPattern, PhoneNumberFormat.RFC3966).split("-");
}
- static boolean checkNumberGroupingIsValid(
+ boolean checkNumberGroupingIsValid(
PhoneNumber number, CharSequence candidate, PhoneNumberUtil util,
NumberGroupingChecker checker) {
- // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
- // and optimise if necessary.
StringBuilder normalizedCandidate =
PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
- String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
+ String[] formattedNumberGroups = getNationalNumberGroups(util, number);
if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
return true;
}
- // If this didn't pass, see if there are any alternate formats, and try them instead.
+ // If this didn't pass, see if there are any alternate formats that match, and try them instead.
PhoneMetadata alternateFormats =
MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
+ String nationalSignificantNumber = util.getNationalSignificantNumber(number);
if (alternateFormats != null) {
for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
+ if (alternateFormat.leadingDigitsPatternSize() > 0) {
+ // There is only one leading digits pattern for alternate formats.
+ Pattern pattern =
+ regexCache.getPatternForRegex(alternateFormat.getLeadingDigitsPattern(0));
+ if (!pattern.matcher(nationalSignificantNumber).lookingAt()) {
+ // Leading digits don't match; try another one.
+ continue;
+ }
+ }
formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
return true;