diff options
Diffstat (limited to 'cpp/src/address_formatter.cc')
-rw-r--r-- | cpp/src/address_formatter.cc | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/cpp/src/address_formatter.cc b/cpp/src/address_formatter.cc new file mode 100644 index 0000000..29f3486 --- /dev/null +++ b/cpp/src/address_formatter.cc @@ -0,0 +1,197 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <libaddressinput/address_formatter.h> + +#include <strings.h> + +#include <algorithm> +#include <cstddef> +#include <string> +#include <vector> + +#include <libaddressinput/address_data.h> +#include <libaddressinput/address_field.h> + +#include "format_element.h" +#include "language.h" +#include "region_data_constants.h" +#include "rule.h" + +namespace i18n { +namespace addressinput { + +namespace { + +const char kCommaSeparator[] = ", "; +const char kSpaceSeparator[] = " "; +const char kArabicCommaSeparator[] = "\xD8\x8C" " "; /* "، " */ + +const char* kLanguagesThatUseSpace[] = { + "th", + "ko" +}; + +const char* kLanguagesThatHaveNoSeparator[] = { + "ja", + "zh" // All Chinese variants. +}; + +// This data is based on CLDR, for languages that are in official use in some +// country, where Arabic is the most likely script tag. +// TODO: Consider supporting variants such as tr-Arab by detecting the script +// code. +const char* kLanguagesThatUseAnArabicComma[] = { + "ar", + "az", + "fa", + "kk", + "ku", + "ky", + "ps", + "tg", + "tk", + "ur", + "uz" +}; + +// Case insensitive matcher for language tags. +struct LanguageMatcher { + LanguageMatcher(const std::string& tag) : tag(tag) {} + std::string tag; + bool operator() (const std::string& s) { + return strcasecmp(tag.c_str(), s.c_str()) == 0; + } +}; + +std::string GetLineSeparatorForLanguage(const std::string& language_tag) { + Language address_language(language_tag); + + // First deal with explicit script tags. + if (address_language.has_latin_script) { + return kCommaSeparator; + } + + // Now guess something appropriate based on the base language. + const std::string& base_language = address_language.base; + if (std::find_if(kLanguagesThatUseSpace, + kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace), + LanguageMatcher(base_language)) != + kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) { + return kSpaceSeparator; + } else if (std::find_if(kLanguagesThatHaveNoSeparator, + kLanguagesThatHaveNoSeparator + + arraysize(kLanguagesThatHaveNoSeparator), + LanguageMatcher(base_language)) != + kLanguagesThatHaveNoSeparator + + arraysize(kLanguagesThatHaveNoSeparator)) { + return ""; + } else if (std::find_if(kLanguagesThatUseAnArabicComma, + kLanguagesThatUseAnArabicComma + + arraysize(kLanguagesThatUseAnArabicComma), + LanguageMatcher(base_language)) != + kLanguagesThatUseAnArabicComma + + arraysize(kLanguagesThatUseAnArabicComma)) { + return kArabicCommaSeparator; + } + // Either the language is a latin-script language, or no language was + // specified. In the latter case we still return ", " as the most common + // separator in use. In countries that don't use this, e.g. Thailand, + // addresses are often written in latin script where this would still be + // appropriate, so this is a reasonable default in the absence of information. + return kCommaSeparator; +} + +void CombineLinesForLanguage( + const std::vector<std::string>& lines, const std::string& language_tag, + std::string *line) { + if (lines.size() > 0) { + line->assign(lines[0]); + } + std::string separator = GetLineSeparatorForLanguage(language_tag); + for (std::vector<std::string>::const_iterator it = lines.begin() + 1; + it < lines.end(); ++it) { + line->append(separator); + line->append(*it); + } +} + +} // namespace + +void GetFormattedNationalAddress( + const AddressData& address_data, std::vector<std::string>* lines) { + assert(lines != NULL); + lines->clear(); + + Rule rule; + rule.CopyFrom(Rule::GetDefault()); + // TODO: Eventually, we should get the best rule for this country and + // language, rather than just for the country. + rule.ParseSerializedRule(RegionDataConstants::GetRegionData( + address_data.region_code)); + + Language language(address_data.language_code); + + // If latinized rules are available and the |language_code| of this address is + // explicitly tagged as being Latin, then use the latinized formatting rules. + const std::vector<FormatElement>& format = + language.has_latin_script && !rule.GetLatinFormat().empty() + ? rule.GetLatinFormat() : rule.GetFormat(); + + std::string line; + for (size_t i = 0; i < format.size(); ++i) { + FormatElement element = format[i]; + if (element.IsNewline()) { + if (!line.empty()) { + lines->push_back(line); + line.clear(); + } + } else if (element.IsField()) { + AddressField field = element.GetField(); + if (field == STREET_ADDRESS) { + // The field "street address" represents the street address lines of an + // address, so there can be multiple values. + if (!line.empty()) { + lines->push_back(line); + line.clear(); + } + lines->insert(lines->end(), address_data.address_line.begin(), + address_data.address_line.end()); + } else { + line.append(address_data.GetFieldValue(field)); + } + } else { + line.append(element.GetLiteral()); + } + } + if (!line.empty()) { + lines->push_back(line); + } +} + +void GetFormattedNationalAddressLine( + const AddressData& address_data, std::string* line) { + std::vector<std::string> address_lines; + GetFormattedNationalAddress(address_data, &address_lines); + CombineLinesForLanguage(address_lines, address_data.language_code, line); +} + +void GetStreetAddressLinesAsSingleLine( + const AddressData& address_data, std::string* line) { + CombineLinesForLanguage( + address_data.address_line, address_data.language_code, line); +} + +} // namespace addressinput +} // namespace i18n |