// Copyright (C) 2014 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include #include "format_element.h" #include "language.h" #include "region_data_constants.h" #include "rule.h" #include "util/cctype_tolower_equal.h" namespace i18n { namespace addressinput { namespace { const char kCommaSeparator[] = ", "; const char kSpaceSeparator[] = " "; const char kArabicCommaSeparator[] = "\xD8\x8C" " "; /* "، " */ const char* kLanguagesThatUseSpace[] = { "th", "ko" }; const char* kLanguagesThatHaveNoSeparator[] = { "ja", "zh" // All Chinese variants. }; // This data is based on CLDR, for languages that are in official use in some // country, where Arabic is the most likely script tag. // TODO: Consider supporting variants such as tr-Arab by detecting the script // code. const char* kLanguagesThatUseAnArabicComma[] = { "ar", "az", "fa", "kk", "ku", "ky", "ps", "tg", "tk", "ur", "uz" }; std::string GetLineSeparatorForLanguage(const std::string& language_tag) { Language address_language(language_tag); // First deal with explicit script tags. if (address_language.has_latin_script) { return kCommaSeparator; } // Now guess something appropriate based on the base language. const std::string& base_language = address_language.base; if (std::find_if(kLanguagesThatUseSpace, kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace), std::bind2nd(EqualToTolowerString(), base_language)) != kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) { return kSpaceSeparator; } else if (std::find_if( kLanguagesThatHaveNoSeparator, kLanguagesThatHaveNoSeparator + arraysize(kLanguagesThatHaveNoSeparator), std::bind2nd(EqualToTolowerString(), base_language)) != kLanguagesThatHaveNoSeparator + arraysize(kLanguagesThatHaveNoSeparator)) { return ""; } else if (std::find_if( kLanguagesThatUseAnArabicComma, kLanguagesThatUseAnArabicComma + arraysize(kLanguagesThatUseAnArabicComma), std::bind2nd(EqualToTolowerString(), base_language)) != kLanguagesThatUseAnArabicComma + arraysize(kLanguagesThatUseAnArabicComma)) { return kArabicCommaSeparator; } // Either the language is a latin-script language, or no language was // specified. In the latter case we still return ", " as the most common // separator in use. In countries that don't use this, e.g. Thailand, // addresses are often written in latin script where this would still be // appropriate, so this is a reasonable default in the absence of information. return kCommaSeparator; } void CombineLinesForLanguage( const std::vector& lines, const std::string& language_tag, std::string *line) { if (lines.size() > 0) { line->assign(lines[0]); } std::string separator = GetLineSeparatorForLanguage(language_tag); for (std::vector::const_iterator it = lines.begin() + 1; it < lines.end(); ++it) { line->append(separator); line->append(*it); } } } // namespace void GetFormattedNationalAddress( const AddressData& address_data, std::vector* lines) { assert(lines != NULL); lines->clear(); Rule rule; rule.CopyFrom(Rule::GetDefault()); // TODO: Eventually, we should get the best rule for this country and // language, rather than just for the country. rule.ParseSerializedRule(RegionDataConstants::GetRegionData( address_data.region_code)); Language language(address_data.language_code); // If latinized rules are available and the |language_code| of this address is // explicitly tagged as being Latin, then use the latinized formatting rules. const std::vector& format = language.has_latin_script && !rule.GetLatinFormat().empty() ? rule.GetLatinFormat() : rule.GetFormat(); std::string line; for (size_t i = 0; i < format.size(); ++i) { FormatElement element = format[i]; if (element.IsNewline()) { if (!line.empty()) { lines->push_back(line); line.clear(); } } else if (element.IsField()) { AddressField field = element.GetField(); if (field == STREET_ADDRESS) { // The field "street address" represents the street address lines of an // address, so there can be multiple values. if (!line.empty()) { lines->push_back(line); line.clear(); } lines->insert(lines->end(), address_data.address_line.begin(), address_data.address_line.end()); } else { line.append(address_data.GetFieldValue(field)); } } else { line.append(element.GetLiteral()); } } if (!line.empty()) { lines->push_back(line); } } void GetFormattedNationalAddressLine( const AddressData& address_data, std::string* line) { std::vector address_lines; GetFormattedNationalAddress(address_data, &address_lines); CombineLinesForLanguage(address_lines, address_data.language_code, line); } void GetStreetAddressLinesAsSingleLine( const AddressData& address_data, std::string* line) { CombineLinesForLanguage( address_data.address_line, address_data.language_code, line); } } // namespace addressinput } // namespace i18n