aboutsummaryrefslogtreecommitdiff
path: root/cpp/src/address_formatter.cc
diff options
context:
space:
mode:
Diffstat (limited to 'cpp/src/address_formatter.cc')
-rw-r--r--cpp/src/address_formatter.cc197
1 files changed, 197 insertions, 0 deletions
diff --git a/cpp/src/address_formatter.cc b/cpp/src/address_formatter.cc
new file mode 100644
index 0000000..29f3486
--- /dev/null
+++ b/cpp/src/address_formatter.cc
@@ -0,0 +1,197 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <libaddressinput/address_formatter.h>
+
+#include <strings.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include <libaddressinput/address_data.h>
+#include <libaddressinput/address_field.h>
+
+#include "format_element.h"
+#include "language.h"
+#include "region_data_constants.h"
+#include "rule.h"
+
+namespace i18n {
+namespace addressinput {
+
+namespace {
+
+const char kCommaSeparator[] = ", ";
+const char kSpaceSeparator[] = " ";
+const char kArabicCommaSeparator[] = "\xD8\x8C" " "; /* "، " */
+
+const char* kLanguagesThatUseSpace[] = {
+ "th",
+ "ko"
+};
+
+const char* kLanguagesThatHaveNoSeparator[] = {
+ "ja",
+ "zh" // All Chinese variants.
+};
+
+// This data is based on CLDR, for languages that are in official use in some
+// country, where Arabic is the most likely script tag.
+// TODO: Consider supporting variants such as tr-Arab by detecting the script
+// code.
+const char* kLanguagesThatUseAnArabicComma[] = {
+ "ar",
+ "az",
+ "fa",
+ "kk",
+ "ku",
+ "ky",
+ "ps",
+ "tg",
+ "tk",
+ "ur",
+ "uz"
+};
+
+// Case insensitive matcher for language tags.
+struct LanguageMatcher {
+ LanguageMatcher(const std::string& tag) : tag(tag) {}
+ std::string tag;
+ bool operator() (const std::string& s) {
+ return strcasecmp(tag.c_str(), s.c_str()) == 0;
+ }
+};
+
+std::string GetLineSeparatorForLanguage(const std::string& language_tag) {
+ Language address_language(language_tag);
+
+ // First deal with explicit script tags.
+ if (address_language.has_latin_script) {
+ return kCommaSeparator;
+ }
+
+ // Now guess something appropriate based on the base language.
+ const std::string& base_language = address_language.base;
+ if (std::find_if(kLanguagesThatUseSpace,
+ kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace),
+ LanguageMatcher(base_language)) !=
+ kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) {
+ return kSpaceSeparator;
+ } else if (std::find_if(kLanguagesThatHaveNoSeparator,
+ kLanguagesThatHaveNoSeparator +
+ arraysize(kLanguagesThatHaveNoSeparator),
+ LanguageMatcher(base_language)) !=
+ kLanguagesThatHaveNoSeparator +
+ arraysize(kLanguagesThatHaveNoSeparator)) {
+ return "";
+ } else if (std::find_if(kLanguagesThatUseAnArabicComma,
+ kLanguagesThatUseAnArabicComma +
+ arraysize(kLanguagesThatUseAnArabicComma),
+ LanguageMatcher(base_language)) !=
+ kLanguagesThatUseAnArabicComma +
+ arraysize(kLanguagesThatUseAnArabicComma)) {
+ return kArabicCommaSeparator;
+ }
+ // Either the language is a latin-script language, or no language was
+ // specified. In the latter case we still return ", " as the most common
+ // separator in use. In countries that don't use this, e.g. Thailand,
+ // addresses are often written in latin script where this would still be
+ // appropriate, so this is a reasonable default in the absence of information.
+ return kCommaSeparator;
+}
+
+void CombineLinesForLanguage(
+ const std::vector<std::string>& lines, const std::string& language_tag,
+ std::string *line) {
+ if (lines.size() > 0) {
+ line->assign(lines[0]);
+ }
+ std::string separator = GetLineSeparatorForLanguage(language_tag);
+ for (std::vector<std::string>::const_iterator it = lines.begin() + 1;
+ it < lines.end(); ++it) {
+ line->append(separator);
+ line->append(*it);
+ }
+}
+
+} // namespace
+
+void GetFormattedNationalAddress(
+ const AddressData& address_data, std::vector<std::string>* lines) {
+ assert(lines != NULL);
+ lines->clear();
+
+ Rule rule;
+ rule.CopyFrom(Rule::GetDefault());
+ // TODO: Eventually, we should get the best rule for this country and
+ // language, rather than just for the country.
+ rule.ParseSerializedRule(RegionDataConstants::GetRegionData(
+ address_data.region_code));
+
+ Language language(address_data.language_code);
+
+ // If latinized rules are available and the |language_code| of this address is
+ // explicitly tagged as being Latin, then use the latinized formatting rules.
+ const std::vector<FormatElement>& format =
+ language.has_latin_script && !rule.GetLatinFormat().empty()
+ ? rule.GetLatinFormat() : rule.GetFormat();
+
+ std::string line;
+ for (size_t i = 0; i < format.size(); ++i) {
+ FormatElement element = format[i];
+ if (element.IsNewline()) {
+ if (!line.empty()) {
+ lines->push_back(line);
+ line.clear();
+ }
+ } else if (element.IsField()) {
+ AddressField field = element.GetField();
+ if (field == STREET_ADDRESS) {
+ // The field "street address" represents the street address lines of an
+ // address, so there can be multiple values.
+ if (!line.empty()) {
+ lines->push_back(line);
+ line.clear();
+ }
+ lines->insert(lines->end(), address_data.address_line.begin(),
+ address_data.address_line.end());
+ } else {
+ line.append(address_data.GetFieldValue(field));
+ }
+ } else {
+ line.append(element.GetLiteral());
+ }
+ }
+ if (!line.empty()) {
+ lines->push_back(line);
+ }
+}
+
+void GetFormattedNationalAddressLine(
+ const AddressData& address_data, std::string* line) {
+ std::vector<std::string> address_lines;
+ GetFormattedNationalAddress(address_data, &address_lines);
+ CombineLinesForLanguage(address_lines, address_data.language_code, line);
+}
+
+void GetStreetAddressLinesAsSingleLine(
+ const AddressData& address_data, std::string* line) {
+ CombineLinesForLanguage(
+ address_data.address_line, address_data.language_code, line);
+}
+
+} // namespace addressinput
+} // namespace i18n