aboutsummaryrefslogtreecommitdiff
path: root/cpp/src/post_box_matchers.cc
diff options
context:
space:
mode:
authorroubert@google.com <roubert@google.com@38ededc0-08b8-5190-f2ac-b31f878777ad>2014-03-21 12:22:07 +0000
committerroubert@google.com <roubert@google.com@38ededc0-08b8-5190-f2ac-b31f878777ad>2014-03-21 12:22:07 +0000
commitc3c546166647eeda301711d0ee83d4f7187f3a9c (patch)
treec6612d70ea8bdf37d083c1a8bd6c60961a4eceac /cpp/src/post_box_matchers.cc
parentaa04d02df8665c81b05c057ddfd22d6ef8606405 (diff)
downloadsrc-c3c546166647eeda301711d0ee83d4f7187f3a9c.tar.gz
Add the PostBoxMatchers helper class.
This class contains regular expressions to match post office boxes, and a function to get the appropriate list of these to use for a particular country. git-svn-id: http://libaddressinput.googlecode.com/svn/trunk@201 38ededc0-08b8-5190-f2ac-b31f878777ad
Diffstat (limited to 'cpp/src/post_box_matchers.cc')
-rw-r--r--cpp/src/post_box_matchers.cc140
1 files changed, 140 insertions, 0 deletions
diff --git a/cpp/src/post_box_matchers.cc b/cpp/src/post_box_matchers.cc
new file mode 100644
index 0000000..fd0602a
--- /dev/null
+++ b/cpp/src/post_box_matchers.cc
@@ -0,0 +1,140 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "util/re2ptr.h" // Must be the first #include statement!
+
+#include "post_box_matchers.h"
+
+#include <algorithm>
+#include <cctype>
+#include <cstddef>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "rule.h"
+
+namespace i18n {
+namespace addressinput {
+
+namespace {
+
+std::map<std::string, const RE2ptr*> InitMatchers() {
+ static const struct {
+ const char* const language;
+ const RE2ptr ptr;
+ } kMatchers[] = {
+ { "ar",
+ /* "صندوق بريد|ص[-. ]ب" */
+ new RE2("\xD8\xB5\xD9\x86\xD8\xAF\xD9\x88\xD9\x82 "
+ "\xD8\xA8\xD8\xB1\xD9\x8A\xD8\xAF|\xD8\xB5[-. ]\xD8\xA8") },
+
+ { "cs", new RE2("(?i)p\\.? ?p\\.? \\d") },
+ { "da", new RE2("(?i)Postboks") },
+ { "de", new RE2("(?i)Postfach") },
+
+ { "el",
+ /* "T\\.? ?Θ\\.? \\d{2}" */
+ new RE2("(?i)T\\.? ?\xCE\x98\\.? \\d{2}") },
+
+ { "en", new RE2("Private Bag|Post(?:al)? Box") },
+ { "es", new RE2("(?i)(?:Apartado|Casillas) de correos?") },
+ { "fi", new RE2("(?i)Postilokero|P\\.?L\\.? \\d") },
+ { "hr", new RE2("(?i)p\\.? ?p\\.? \\d") },
+
+ { "hu",
+ /* "Postafi(?:[oó]|ó)k|Pf\\.? \\d" */
+ new RE2("(?i)Postafi(?:[o\xC3\xB3]|o\xCC\x81)k|Pf\\.? \\d") },
+
+ { "fr",
+ /* "Bo(?:[iî]|î)te Postale|BP \\d|CEDEX \\d" */
+ new RE2("(?i)Bo(?:[i\xC3\xAE]|i\xCC\x82)te Postale|BP \\d|CEDEX \\d") },
+
+ { "ja",
+ /* "私書箱\\d{1,5}号" */
+ new RE2("(?i)\xE7\xA7\x81\xE6\x9B\xB8\xE7\xAE\xB1\\d{1,5}\xE5\x8F\xB7") },
+
+ { "nl", new RE2("(?i)Postbus") },
+ { "no", new RE2("(?i)Postboks") },
+ { "pl", new RE2("(?i)Skr(?:\\.?|ytka) poczt(?:\\.?|owa)") },
+ { "pt", new RE2("(?i)Apartado") },
+
+ { "ru",
+ /* "абонентский ящик|[аa]\\\" */
+ new RE2("(?i)\xD0\xB0\xD0\xB1\xD0\xBE\xD0\xBD\xD0\xB5\xD0\xBD\xD1\x82\xD1"
+ "\x81\xD0\xBA\xD0\xB8\xD0\xB9 \xD1\x8F\xD1\x89\xD0\xB8\xD0\xBA|"
+ "[\xD0\xB0""a]\\\"\xD1\x8F (?:(?:\xE2\x84\x96|#|N) ?)?\\d") },
+
+ { "sv", new RE2("(?i)Box \\d") },
+
+ { "zh",
+ /* "郵政信箱.{1,5}號|郵局第.{1,10}號信箱" */
+ new RE2("(?i)\xE9\x83\xB5\xE6\x94\xBF\xE4\xBF\xA1\xE7\xAE\xB1.{1,5}"
+ "\xE8\x99\x9F|\xE9\x83\xB5\xE5\xB1\x80\xE7\xAC\xAC.{1,10}"
+ "\xE8\x99\x9F\xE4\xBF\xA1\xE7\xAE\xB1") },
+
+ { "und", new RE2("P\\.? ?O\\.? Box") }
+ };
+
+ std::map<std::string, const RE2ptr*> matchers;
+
+ for (size_t i = 0; i < sizeof kMatchers / sizeof *kMatchers; ++i) {
+ matchers.insert(std::make_pair(kMatchers[i].language, &kMatchers[i].ptr));
+ }
+
+ return matchers;
+}
+
+std::string GetBaseLanguage(const std::string& language) {
+ // Be lenient in parsing, allow underscore separators and uppercase letters.
+ std::string::size_type end = language.find_first_of("-_");
+ std::string base(
+ end == std::string::npos ? language : language.substr(0, end));
+ std::transform(base.begin(), base.end(), base.begin(), tolower);
+ return base;
+}
+
+} // namespace
+
+// static
+std::vector<const RE2ptr*> PostBoxMatchers::GetMatchers(
+ const Rule& country_rule) {
+ static const std::map<std::string, const RE2ptr*> kMatchers(InitMatchers());
+
+ // Always add any expressions defined for "und" (English-like defaults).
+ std::vector<std::string> languages(1, "und");
+ for (std::vector<std::string>::const_iterator
+ it = country_rule.GetLanguages().begin();
+ it != country_rule.GetLanguages().end(); ++it) {
+ languages.push_back(GetBaseLanguage(*it));
+ }
+
+ std::vector<const RE2ptr*> result;
+
+ for (std::vector<std::string>::const_iterator
+ it = languages.begin();
+ it != languages.end(); ++it) {
+ std::map<std::string, const RE2ptr*>::const_iterator
+ jt = kMatchers.find(*it);
+ if (jt != kMatchers.end()) {
+ result.push_back(jt->second);
+ }
+ }
+
+ return result;
+}
+
+} // namespace addressinput
+} // namespace i18n