diff options
-rw-r--r-- | cpp/include/libaddressinput/address_ui.h | 13 | ||||
-rw-r--r-- | cpp/include/libaddressinput/localization.h | 21 | ||||
-rw-r--r-- | cpp/libaddressinput.gyp | 2 | ||||
-rw-r--r-- | cpp/src/address_ui.cc | 64 | ||||
-rw-r--r-- | cpp/src/language.cc | 58 | ||||
-rw-r--r-- | cpp/src/language.h | 44 | ||||
-rw-r--r-- | cpp/src/localization.cc | 16 | ||||
-rw-r--r-- | cpp/src/post_box_matchers.cc | 15 | ||||
-rw-r--r-- | cpp/src/rule.cc | 10 | ||||
-rw-r--r-- | cpp/src/rule.h | 15 | ||||
-rw-r--r-- | cpp/test/address_ui_test.cc | 134 | ||||
-rw-r--r-- | cpp/test/language_test.cc | 62 | ||||
-rw-r--r-- | cpp/test/localization_test.cc | 15 | ||||
-rw-r--r-- | cpp/test/rule_test.cc | 12 |
14 files changed, 440 insertions, 41 deletions
diff --git a/cpp/include/libaddressinput/address_ui.h b/cpp/include/libaddressinput/address_ui.h index 35d2ede..8939501 100644 --- a/cpp/include/libaddressinput/address_ui.h +++ b/cpp/include/libaddressinput/address_ui.h @@ -28,10 +28,19 @@ struct AddressUiComponent; const std::vector<std::string>& GetRegionCodes(); // Returns the UI components for the CLDR |region_code|. Uses the strings from -// |localization|. Returns an empty vector on error. +// |localization|. The components can be in default or Latin order, depending on +// the language of |localization|. +// +// Sets the |best_address_language_tag| to the BCP 47 language tag that should +// be saved with this address. This language will be used to get drop-downs to +// help users fill in their address, and to format the address that the user +// entered. The parameter should not be NULL. +// +// Returns an empty vector on error. std::vector<AddressUiComponent> BuildComponents( const std::string& region_code, - const Localization& localization); + const Localization& localization, + std::string* best_address_language_tag); } // namespace addressinput } // namespace i18n diff --git a/cpp/include/libaddressinput/localization.h b/cpp/include/libaddressinput/localization.h index 6b599c3..77a1917 100644 --- a/cpp/include/libaddressinput/localization.h +++ b/cpp/include/libaddressinput/localization.h @@ -23,12 +23,14 @@ namespace addressinput { // The object to retrieve localized strings based on message IDs. Sample usage: // Localization localization; // localization.SetLanguage("en"); -// Process(BuildComponents("CA", localization)); +// std::string best_language_tag; +// Process(BuildComponents("CA", localization, &best_language_tag)); // // Alternative usage: // Localization localization; -// localization.SetGetter(&MyStringGetter); -// Process(BuildComponents("CA", localization)); +// localization.SetGetter(&MyStringGetter, "fr"); +// std::string best_language_tag; +// Process(BuildComponents("CA", localization, &best_language_tag)); class Localization { public: // Initializes with English messages by default. @@ -41,15 +43,22 @@ class Localization { // Sets the language for the strings. The only supported language is "en" // until we have translations. - void SetLanguage(const std::string& language_code); + void SetLanguage(const std::string& language_tag); // Sets the string getter that takes a message identifier and returns the - // corresponding localized string. - void SetGetter(std::string (*getter)(int)); + // corresponding localized string. The |language_tag| parameter is used only + // for information purposes here. + void SetGetter(std::string (*getter)(int), const std::string& language_tag); + + // Returns the current language tag. + const std::string& GetLanguage() const { return language_tag_; } private: // The string getter. std::string (*get_string_)(int); + + // The current language tag. + std::string language_tag_; }; } // namespace addressinput diff --git a/cpp/libaddressinput.gyp b/cpp/libaddressinput.gyp index e57bda0..b4ab884 100644 --- a/cpp/libaddressinput.gyp +++ b/cpp/libaddressinput.gyp @@ -37,6 +37,7 @@ 'src/address_problem.cc', 'src/address_ui.cc', 'src/address_validator.cc', + 'src/language.cc', 'src/localization.cc', 'src/lookup_key.cc', 'src/lookup_key_util.cc', @@ -75,6 +76,7 @@ 'test/fake_downloader_test.cc', 'test/fake_storage.cc', 'test/fake_storage_test.cc', + 'test/language_test.cc', 'test/localization_test.cc', 'test/lookup_key_test.cc', 'test/lookup_key_util_test.cc', diff --git a/cpp/src/address_ui.cc b/cpp/src/address_ui.cc index 15f3c61..0eb28fc 100644 --- a/cpp/src/address_ui.cc +++ b/cpp/src/address_ui.cc @@ -18,12 +18,15 @@ #include <libaddressinput/address_ui_component.h> #include <libaddressinput/localization.h> +#include <cassert> +#include <cstddef> #include <set> #include <string> #include <vector> #include "address_field_util.h" #include "grit.h" +#include "language.h" #include "messages.h" #include "region_data_constants.h" #include "rule.h" @@ -33,6 +36,41 @@ namespace addressinput { namespace { +Language ChooseBestAddressLanguage( + const std::vector<Language>& available_languages, + bool has_latin_format, + const Language& ui_language) { + if (available_languages.empty()) { + return ui_language; + } + + if (ui_language.tag.empty()) { + return available_languages.front(); + } + + // The conventionally formatted BCP 47 Latin script with a preceding subtag + // separator. + static const char kLatinScriptSuffix[] = "-Latn"; + Language latin_script_language( + available_languages.front().base + kLatinScriptSuffix); + if (has_latin_format && ui_language.has_latin_script) { + return latin_script_language; + } + + for (std::vector<Language>::const_iterator + available_lang_it = available_languages.begin(); + available_lang_it != available_languages.end(); ++available_lang_it) { + // Base language comparison works because no region supports the same base + // language with different scripts, for now. For example, no region supports + // "zh-Hant" and "zh-Hans" at the same time. + if (ui_language.base == available_lang_it->base) { + return *available_lang_it; + } + } + + return has_latin_format ? latin_script_language : available_languages.front(); +} + int GetMessageIdForField(AddressField field, int admin_area_name_message_id, int postal_code_name_message_id) { @@ -71,7 +109,9 @@ const std::vector<std::string>& GetRegionCodes() { std::vector<AddressUiComponent> BuildComponents( const std::string& region_code, - const Localization& localization) { + const Localization& localization, + std::string* best_address_language_tag) { + assert(best_address_language_tag != NULL); std::vector<AddressUiComponent> result; Rule rule; @@ -81,15 +121,31 @@ std::vector<AddressUiComponent> BuildComponents( return result; } + std::vector<Language> available_languages; + for (std::vector<std::string>::const_iterator language_tag_it = + rule.GetLanguages().begin(); + language_tag_it != rule.GetLanguages().end(); ++language_tag_it) { + available_languages.push_back(Language(*language_tag_it)); + } + + const Language& best_address_language = ChooseBestAddressLanguage( + available_languages, !rule.GetLatinFormat().empty(), + Language(localization.GetLanguage())); + *best_address_language_tag = best_address_language.tag; + + const std::vector<AddressField>& format = + !rule.GetLatinFormat().empty() && + best_address_language.has_latin_script + ? rule.GetLatinFormat() : rule.GetFormat(); + // For avoiding showing an input field twice, when the field is displayed // twice on an envelope. std::set<AddressField> fields; bool previous_field_is_newline = true; bool next_field_is_newline = true; - for (std::vector<AddressField>::const_iterator field_it = - rule.GetFormat().begin(); - field_it != rule.GetFormat().end(); ++field_it) { + for (std::vector<AddressField>::const_iterator field_it = format.begin(); + field_it != format.end(); ++field_it) { if (IsNewline(*field_it)) { previous_field_is_newline = true; continue; diff --git a/cpp/src/language.cc b/cpp/src/language.cc new file mode 100644 index 0000000..5c212dd --- /dev/null +++ b/cpp/src/language.cc @@ -0,0 +1,58 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "language.h" + +#include <algorithm> +#include <cctype> +#include <string> +#include <vector> + +#include "util/string_split.h" + +namespace i18n { +namespace addressinput { + +Language::Language(const std::string& language_tag) : tag(language_tag), + base(), + has_latin_script(false) { + // Character '-' is the separator for subtags in the BCP 47. However, some + // legacy code generates tags with '_' instead of '-'. + static const char kSubtagsSeparator = '-'; + static const char kAlternativeSubtagsSeparator = '_'; + std::replace(tag.begin(), tag.end(), kAlternativeSubtagsSeparator, + kSubtagsSeparator); + + // OK to use 'tolower' because BCP 47 tags are always in ASCII. + std::string lowercase = tag; + std::transform(lowercase.begin(), lowercase.end(), lowercase.begin(), + tolower); + + base = lowercase.substr(0, lowercase.find(kSubtagsSeparator)); + + // The lowercase BCP 47 subtag for Latin script. + static const char kLowercaseLatinScript[] = "latn"; + std::vector<std::string> subtags; + SplitString(lowercase, kSubtagsSeparator, &subtags); + + // Support only the second and third position for the script. + has_latin_script = + (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) || + (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript); +} + +Language::~Language() {} + +} // namespace addressinput +} // namespace i18n diff --git a/cpp/src/language.h b/cpp/src/language.h new file mode 100644 index 0000000..f2cc447 --- /dev/null +++ b/cpp/src/language.h @@ -0,0 +1,44 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef I18N_ADDRESSINPUT_LANGUAGE_H_ +#define I18N_ADDRESSINPUT_LANGUAGE_H_ + +#include <string> + +namespace i18n { +namespace addressinput { + +// Helper for working with a BCP 47 language tag. +// http://tools.ietf.org/html/bcp47 +struct Language { + explicit Language(const std::string& language_tag); + ~Language(); + + // The language tag (with '_' replaced with '-'), for example "zh-Latn-CN". + std::string tag; + + // The base language, for example "zh". Always lowercase. + std::string base; + + // True if the language tag explicitly has a Latin script. For example, this + // is true for "zh-Latn", but false for "zh". Only the second and third subtag + // positions are supported for script. + bool has_latin_script; +}; + +} // namespace addressinput +} // namespace i18n + +#endif // I18N_ADDRESSINPUT_LANGUAGE_H_ diff --git a/cpp/src/localization.cc b/cpp/src/localization.cc index 3fd32af..1558b23 100644 --- a/cpp/src/localization.cc +++ b/cpp/src/localization.cc @@ -23,7 +23,9 @@ namespace addressinput { namespace { -// For each language code XX with translations: +static const char kDefaultLanguage[] = "en"; + +// For each language XX with translations: // (1) Add a namespace XX here with an include of "XX_messages.cc". // (2) Add a wrapper that converts the char pointer to std::string. (GRIT // generated functions return char pointers.) @@ -41,7 +43,8 @@ std::string GetStdString(int message_id) { } // namespace -Localization::Localization() : get_string_(&en::GetStdString) {} +Localization::Localization() : get_string_(&en::GetStdString), + language_tag_(kDefaultLanguage) {} Localization::~Localization() {} @@ -49,17 +52,20 @@ std::string Localization::GetString(int message_id) const { return get_string_(message_id); } -void Localization::SetLanguage(const std::string& language_code) { - if (language_code == "en") { +void Localization::SetLanguage(const std::string& language_tag) { + if (language_tag == kDefaultLanguage) { get_string_ = &en::GetStdString; } else { assert(false); } + language_tag_ = language_tag; } -void Localization::SetGetter(std::string (*getter)(int)) { +void Localization::SetGetter(std::string (*getter)(int), + const std::string& language_tag) { assert(getter != NULL); get_string_ = getter; + language_tag_ = language_tag; } } // namespace addressinput diff --git a/cpp/src/post_box_matchers.cc b/cpp/src/post_box_matchers.cc index fd0602a..95ee375 100644 --- a/cpp/src/post_box_matchers.cc +++ b/cpp/src/post_box_matchers.cc @@ -16,14 +16,13 @@ #include "post_box_matchers.h" -#include <algorithm> -#include <cctype> #include <cstddef> #include <map> #include <string> #include <utility> #include <vector> +#include "language.h" #include "rule.h" namespace i18n { @@ -97,15 +96,6 @@ std::map<std::string, const RE2ptr*> InitMatchers() { return matchers; } -std::string GetBaseLanguage(const std::string& language) { - // Be lenient in parsing, allow underscore separators and uppercase letters. - std::string::size_type end = language.find_first_of("-_"); - std::string base( - end == std::string::npos ? language : language.substr(0, end)); - std::transform(base.begin(), base.end(), base.begin(), tolower); - return base; -} - } // namespace // static @@ -118,7 +108,8 @@ std::vector<const RE2ptr*> PostBoxMatchers::GetMatchers( for (std::vector<std::string>::const_iterator it = country_rule.GetLanguages().begin(); it != country_rule.GetLanguages().end(); ++it) { - languages.push_back(GetBaseLanguage(*it)); + Language language(*it); + languages.push_back(language.base); } std::vector<const RE2ptr*> result; diff --git a/cpp/src/rule.cc b/cpp/src/rule.cc index ebd5be5..48ec3dc 100644 --- a/cpp/src/rule.cc +++ b/cpp/src/rule.cc @@ -40,10 +40,11 @@ typedef std::map<std::string, int> NameMessageIdMap; const char kAdminAreaNameTypeKey[] = "state_name_type"; const char kFormatKey[] = "fmt"; const char kIdKey[] = "id"; +const char kLanguagesKey[] = "languages"; +const char kLatinFormatKey[] = "lfmt"; const char kPostalCodeNameTypeKey[] = "zip_name_type"; const char kRequireKey[] = "require"; const char kSubKeysKey[] = "sub_keys"; -const char kLanguagesKey[] = "languages"; const char kZipKey[] = "zip"; // Used as a separator in a list of items. For example, the list of supported @@ -108,6 +109,7 @@ int GetMessageIdFromName(const std::string& name, Rule::Rule() : id_(), format_(), + latin_format_(), required_(), sub_keys_(), languages_(), @@ -132,6 +134,7 @@ const Rule& Rule::GetDefault() { void Rule::CopyFrom(const Rule& rule) { id_ = rule.id_; format_ = rule.format_; + latin_format_ = rule.latin_format_; required_ = rule.required_; sub_keys_ = rule.sub_keys_; languages_ = rule.languages_; @@ -158,6 +161,11 @@ bool Rule::ParseSerializedRule(const std::string& serialized_rule) { ParseAddressFieldsFormat(json.GetStringValueForKey(kFormatKey), &format_); } + if (json.HasStringValueForKey(kLatinFormatKey)) { + ParseAddressFieldsFormat( + json.GetStringValueForKey(kLatinFormatKey), &latin_format_); + } + if (json.HasStringValueForKey(kRequireKey)) { ParseAddressFieldsRequired( json.GetStringValueForKey(kRequireKey), &required_); diff --git a/cpp/src/rule.h b/cpp/src/rule.h index dbba9f1..bd74784 100644 --- a/cpp/src/rule.h +++ b/cpp/src/rule.h @@ -58,10 +58,16 @@ class Rule { // Returns the ID string for this rule. const std::string& GetId() const { return id_; }; - // Returns the address format for this rule. The format can include the - // NEWLINE extension for AddressField enum. + // Returns the approximate address format with the default order of fields. + // The format can include the NEWLINE extension for AddressField enum. const std::vector<AddressField>& GetFormat() const { return format_; } + // Returns the approximate address format with the Latin order of fields. The + // format can include the NEWLINE extension for AddressField enum. + const std::vector<AddressField>& GetLatinFormat() const { + return latin_format_; + } + // Returns the required fields for this rule. const std::vector<AddressField>& GetRequired() const { return required_; } @@ -71,8 +77,8 @@ class Rule { // "CA", "NY", "TX", etc. const std::vector<std::string>& GetSubKeys() const { return sub_keys_; } - // Returns all of the language codes for which this rule has custom rules, for - // example ["de", "fr", "it"]. + // Returns all of the language tags supported by this rule, for example ["de", + // "fr", "it"]. const std::vector<std::string>& GetLanguages() const { return languages_; } // Returns a pointer to a RE2 regular expression object created from the @@ -97,6 +103,7 @@ class Rule { private: std::string id_; std::vector<AddressField> format_; + std::vector<AddressField> latin_format_; std::vector<AddressField> required_; std::vector<std::string> sub_keys_; std::vector<std::string> languages_; diff --git a/cpp/test/address_ui_test.cc b/cpp/test/address_ui_test.cc index b8cb834..b36ea8c 100644 --- a/cpp/test/address_ui_test.cc +++ b/cpp/test/address_ui_test.cc @@ -26,13 +26,16 @@ namespace { +using i18n::addressinput::ADMIN_AREA; using i18n::addressinput::AddressField; using i18n::addressinput::AddressUiComponent; using i18n::addressinput::BuildComponents; using i18n::addressinput::COUNTRY; using i18n::addressinput::GetRegionCodes; using i18n::addressinput::Localization; +using i18n::addressinput::POSTAL_CODE; using i18n::addressinput::RECIPIENT; +using i18n::addressinput::STREET_ADDRESS; // Returns testing::AssertionSuccess if the |components| are valid. Uses // |region_code| in test failure messages. @@ -66,6 +69,7 @@ testing::AssertionResult ComponentsAreValid( class AddressUiTest : public testing::TestWithParam<std::string> { protected: Localization localization_; + std::string best_address_language_tag_; }; // Verifies that a region code consists of two characters, for example "TW". @@ -76,14 +80,15 @@ TEST_P(AddressUiTest, RegionCodeHasTwoCharacters) { // Verifies that BuildComponents() returns valid UI components for a region // code. TEST_P(AddressUiTest, ComponentsAreValid) { - EXPECT_TRUE(ComponentsAreValid(BuildComponents(GetParam(), localization_))); + EXPECT_TRUE(ComponentsAreValid(BuildComponents( + GetParam(), localization_, &best_address_language_tag_))); } // Verifies that BuildComponents() returns at most one input field of each type. TEST_P(AddressUiTest, UniqueFieldTypes) { std::set<AddressField> fields; const std::vector<AddressUiComponent>& components = - BuildComponents(GetParam(), localization_); + BuildComponents(GetParam(), localization_, &best_address_language_tag_); for (std::vector<AddressUiComponent>::const_iterator it = components.begin(); it != components.end(); ++it) { EXPECT_TRUE(fields.insert(it->field).second); @@ -98,7 +103,130 @@ INSTANTIATE_TEST_CASE_P( // Verifies that BuildComponents() returns an empty vector for an invalid region // code. TEST_F(AddressUiTest, InvalidRegionCodeReturnsEmptyVector) { - EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_).empty()); + EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_, + &best_address_language_tag_).empty()); } +// Test data for determining the best language tag and whether the right format +// pattern was used (fmt vs lfmt). +struct LanguageTestCase { + LanguageTestCase(const std::string& region_code, + const std::string& ui_language_tag, + const std::string& expected_best_address_language_tag, + AddressField expected_first_field) + : region_code(region_code), + ui_language_tag(ui_language_tag), + expected_best_address_language_tag(expected_best_address_language_tag), + expected_first_field(expected_first_field) {} + + ~LanguageTestCase() {} + + // The CLDR region code to test. + const std::string region_code; + + // The BCP 47 language tag to test. + const std::string ui_language_tag; + + // The expected value for the best language tag returned by BuildComponents(). + const std::string expected_best_address_language_tag; + + // The first field expected to be returned from BuildComponents(). Useful for + // determining whether the returned format is in Latin or default order. + const AddressField expected_first_field; +}; + +class BestAddressLanguageTagTest + : public testing::TestWithParam<LanguageTestCase> { + protected: + Localization localization_; + std::string best_address_language_tag_; +}; + +std::string GetterStub(int) { return std::string(); } + +TEST_P(BestAddressLanguageTagTest, CorrectBestAddressLanguageTag) { + localization_.SetGetter(&GetterStub, GetParam().ui_language_tag); + const std::vector<AddressUiComponent>& components = BuildComponents( + GetParam().region_code, localization_, &best_address_language_tag_); + EXPECT_EQ(GetParam().expected_best_address_language_tag, + best_address_language_tag_); + ASSERT_FALSE(components.empty()); + EXPECT_EQ(GetParam().expected_first_field, components.front().field); +} + +INSTANTIATE_TEST_CASE_P( + LanguageTestCases, BestAddressLanguageTagTest, + testing::Values( + // Armenia supports hy and has a Latin format. + LanguageTestCase("AM", "", "hy", RECIPIENT), + LanguageTestCase("AM", "hy", "hy", RECIPIENT), + LanguageTestCase("AM", "en", "hy-Latn", RECIPIENT), + + // P.R. China supports zh-Hans and has a Latin format. + LanguageTestCase("CN", "zh-hans", "zh-Hans", POSTAL_CODE), + LanguageTestCase("CN", "zh-hant", "zh-Hans", POSTAL_CODE), + LanguageTestCase("CN", "zh-hans-CN", "zh-Hans", POSTAL_CODE), + LanguageTestCase("CN", "zh", "zh-Hans", POSTAL_CODE), + LanguageTestCase("CN", "ZH_HANS", "zh-Hans", POSTAL_CODE), + LanguageTestCase("CN", "zh-cmn-Hans-CN", "zh-Hans", POSTAL_CODE), + LanguageTestCase("CN", "zh-Latn", "zh-Latn", RECIPIENT), + LanguageTestCase("CN", "zh-latn-CN", "zh-Latn", RECIPIENT), + LanguageTestCase("CN", "en", "zh-Latn", RECIPIENT), + LanguageTestCase("CN", "ja", "zh-Latn", RECIPIENT), + LanguageTestCase("CN", "ko", "zh-Latn", RECIPIENT), + LanguageTestCase("CN", "ZH_LATN", "zh-Latn", RECIPIENT), + // Libaddressinput does not have information about extended language + // subtags, so it uses the zh-Latn language tag for all base languages + // that are not zh, even if it's effectively the same language. + // Mandarin Chinese, Simplified script, as used in China: + LanguageTestCase("CN", "cmn-Hans-CN", "zh-Latn", RECIPIENT), + + // Hong Kong supports zh-Hant and en. It has a Latin format. + LanguageTestCase("HK", "zh", "zh-Hant", ADMIN_AREA), + LanguageTestCase("HK", "zh-hans", "zh-Hant", ADMIN_AREA), + LanguageTestCase("HK", "zh-hant", "zh-Hant", ADMIN_AREA), + LanguageTestCase("HK", "zh-yue-HK", "zh-Hant", ADMIN_AREA), + LanguageTestCase("HK", "en", "en", ADMIN_AREA), + LanguageTestCase("HK", "zh-latn", "zh-Latn", RECIPIENT), + LanguageTestCase("HK", "fr", "zh-Latn", RECIPIENT), + LanguageTestCase("HK", "ja", "zh-Latn", RECIPIENT), + LanguageTestCase("HK", "ko", "zh-Latn", RECIPIENT), + // Libaddressinput does not have information about extended language + // subtags, so it uses the zh-Latn language tag for all base languages + // that are not zh or en, even if it's effectively the same language. + // Cantonese Chinese, as used in Hong Kong: + LanguageTestCase("HK", "yue-HK", "zh-Latn", RECIPIENT), + + // Macao supports zh-Hant and pt. It has a Latin format. + LanguageTestCase("MO", "zh", "zh-Hant", STREET_ADDRESS), + LanguageTestCase("MO", "zh-Hant", "zh-Hant", STREET_ADDRESS), + LanguageTestCase("MO", "pt", "pt", STREET_ADDRESS), + LanguageTestCase("MO", "zh-Latn", "zh-Latn", RECIPIENT), + LanguageTestCase("MO", "en", "zh-Latn", RECIPIENT), + + // Switzerland supports de, fr, and it. + LanguageTestCase("CH", "de", "de", RECIPIENT), + LanguageTestCase("CH", "de-DE", "de", RECIPIENT), + LanguageTestCase("CH", "de-Latn-DE", "de", RECIPIENT), + LanguageTestCase("CH", "fr", "fr", RECIPIENT), + LanguageTestCase("CH", "it", "it", RECIPIENT), + LanguageTestCase("CH", "en", "de", RECIPIENT), + + // Antarctica does not have language information. + LanguageTestCase("AQ", "en", "en", RECIPIENT), + LanguageTestCase("AQ", "fr", "fr", RECIPIENT), + LanguageTestCase("AQ", "es", "es", RECIPIENT), + LanguageTestCase("AQ", "zh-Hans", "zh-Hans", RECIPIENT), + + // Egypt supports ar and has a Latin format. + LanguageTestCase("EG", "ar", "ar", RECIPIENT), + LanguageTestCase("EG", "ar-Arab", "ar", RECIPIENT), + LanguageTestCase("EG", "ar-Latn", "ar-Latn", RECIPIENT), + LanguageTestCase("EG", "fr", "ar-Latn", RECIPIENT), + LanguageTestCase("EG", "fa", "ar-Latn", RECIPIENT), + // Libaddressinput does not have language-to-script mapping, so it uses + // the ar-Latn language tag for all base languages that are not ar, even + // if the script is the same. + LanguageTestCase("EG", "fa-Arab", "ar-Latn", RECIPIENT))); + } // namespace diff --git a/cpp/test/language_test.cc b/cpp/test/language_test.cc new file mode 100644 index 0000000..197459e --- /dev/null +++ b/cpp/test/language_test.cc @@ -0,0 +1,62 @@ +// Copyright (C) 2014 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "language.h" + +#include <string> + +#include <gtest/gtest.h> + +namespace { + +using i18n::addressinput::Language; + +struct LanguageTestCase { + LanguageTestCase(const std::string& input_language_tag, + const std::string& expected_language_tag, + const std::string& expected_base_language, + bool expected_has_latin_script) + : input_language_tag(input_language_tag), + expected_language_tag(expected_language_tag), + expected_base_language(expected_base_language), + expected_has_latin_script(expected_has_latin_script) {} + + ~LanguageTestCase() {} + + const std::string input_language_tag; + const std::string expected_language_tag; + const std::string expected_base_language; + const bool expected_has_latin_script; +}; + +class LanguageTest : public testing::TestWithParam<LanguageTestCase> {}; + +TEST_P(LanguageTest, ExtractedDataIsCorrect) { + Language language(GetParam().input_language_tag); + EXPECT_EQ(GetParam().expected_language_tag, language.tag); + EXPECT_EQ(GetParam().expected_base_language, language.base); + EXPECT_EQ(GetParam().expected_has_latin_script, language.has_latin_script); +} + +INSTANTIATE_TEST_CASE_P( + LanguageTestCases, LanguageTest, + testing::Values( + LanguageTestCase("", "", "", false), + LanguageTestCase("en", "en", "en", false), + LanguageTestCase("zh-Latn-CN", "zh-Latn-CN", "zh", true), + LanguageTestCase("zh-cmn-Latn-CN", "zh-cmn-Latn-CN", "zh", true), + LanguageTestCase("zh-Hans", "zh-Hans", "zh", false), + LanguageTestCase("en_GB", "en-GB", "en", false))); + +} // namespace diff --git a/cpp/test/localization_test.cc b/cpp/test/localization_test.cc index 6c8b3be..334068f 100644 --- a/cpp/test/localization_test.cc +++ b/cpp/test/localization_test.cc @@ -33,11 +33,13 @@ class LocalizationTest : public testing::TestWithParam<int> { }; // Verifies that a custom message getter can be used. -const char kValidMessage[] = "Data"; +static const char kValidMessage[] = "Data"; +static const char kValidLanguageTag[] = "tlh"; std::string GetValidMessage(int message_id) { return kValidMessage; } TEST_P(LocalizationTest, ValidStringGetterCanBeUsed) { - localization_.SetGetter(&GetValidMessage); + localization_.SetGetter(&GetValidMessage, kValidLanguageTag); EXPECT_EQ(kValidMessage, localization_.GetString(GetParam())); + EXPECT_EQ(kValidLanguageTag, localization_.GetLanguage()); } // Verifies that the default language for messages does not have empty strings. @@ -45,8 +47,8 @@ TEST_P(LocalizationTest, DefaultStringIsNotEmpty) { EXPECT_FALSE(localization_.GetString(GetParam()).empty()); } -// Verifies that English is the default language. -TEST_P(LocalizationTest, EnglishIsDefaultLanguage) { +// Verifies that the default string is English. +TEST_P(LocalizationTest, DefaultStringIsEnglish) { std::string default_string = localization_.GetString(GetParam()); localization_.SetLanguage("en"); EXPECT_EQ(default_string, localization_.GetString(GetParam())); @@ -80,4 +82,9 @@ TEST_F(LocalizationTest, InvalidMessageIsEmptyString) { EXPECT_TRUE(localization_.GetString(INVALID_MESSAGE_ID).empty()); } +// Verifies that the default language is English. +TEST_F(LocalizationTest, DefaultLanguageIsEnglish) { + EXPECT_EQ("en", localization_.GetLanguage()); +} + } // namespace diff --git a/cpp/test/rule_test.cc b/cpp/test/rule_test.cc index b9e671a..ca54006 100644 --- a/cpp/test/rule_test.cc +++ b/cpp/test/rule_test.cc @@ -43,6 +43,7 @@ TEST(RuleTest, CopyOverwritesRule) { Rule rule; ASSERT_TRUE(rule.ParseSerializedRule("{" "\"fmt\":\"%S%Z\"," + "\"lfmt\":\"%Z%S\"," "\"id\":\"data/XA\"," "\"lname\":\"Testistan\"," "\"require\":\"AC\"," @@ -55,6 +56,7 @@ TEST(RuleTest, CopyOverwritesRule) { Rule copy; EXPECT_NE(rule.GetFormat(), copy.GetFormat()); + EXPECT_NE(rule.GetLatinFormat(), copy.GetLatinFormat()); EXPECT_NE(rule.GetId(), copy.GetId()); EXPECT_NE(rule.GetRequired(), copy.GetRequired()); EXPECT_NE(rule.GetSubKeys(), copy.GetSubKeys()); @@ -69,6 +71,7 @@ TEST(RuleTest, CopyOverwritesRule) { copy.CopyFrom(rule); EXPECT_EQ(rule.GetFormat(), copy.GetFormat()); + EXPECT_EQ(rule.GetLatinFormat(), copy.GetLatinFormat()); EXPECT_EQ(rule.GetId(), copy.GetId()); EXPECT_EQ(rule.GetRequired(), copy.GetRequired()); EXPECT_EQ(rule.GetSubKeys(), copy.GetSubKeys()); @@ -115,6 +118,15 @@ TEST(RuleTest, ParsesFormatCorrectly) { EXPECT_EQ(expected, rule.GetFormat()); } +TEST(RuleTest, ParsesLatinFormatCorrectly) { + std::vector<AddressField> expected; + expected.push_back(LOCALITY); + expected.push_back(ADMIN_AREA); + Rule rule; + ASSERT_TRUE(rule.ParseSerializedRule("{\"lfmt\":\"%C%S\"}")); + EXPECT_EQ(expected, rule.GetLatinFormat()); +} + TEST(RuleTest, ParsesRequiredCorrectly) { std::vector<AddressField> expected; expected.push_back(STREET_ADDRESS); |