aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpp/include/libaddressinput/address_ui.h13
-rw-r--r--cpp/include/libaddressinput/localization.h21
-rw-r--r--cpp/libaddressinput.gyp2
-rw-r--r--cpp/src/address_ui.cc64
-rw-r--r--cpp/src/language.cc58
-rw-r--r--cpp/src/language.h44
-rw-r--r--cpp/src/localization.cc16
-rw-r--r--cpp/src/post_box_matchers.cc15
-rw-r--r--cpp/src/rule.cc10
-rw-r--r--cpp/src/rule.h15
-rw-r--r--cpp/test/address_ui_test.cc134
-rw-r--r--cpp/test/language_test.cc62
-rw-r--r--cpp/test/localization_test.cc15
-rw-r--r--cpp/test/rule_test.cc12
14 files changed, 440 insertions, 41 deletions
diff --git a/cpp/include/libaddressinput/address_ui.h b/cpp/include/libaddressinput/address_ui.h
index 35d2ede..8939501 100644
--- a/cpp/include/libaddressinput/address_ui.h
+++ b/cpp/include/libaddressinput/address_ui.h
@@ -28,10 +28,19 @@ struct AddressUiComponent;
const std::vector<std::string>& GetRegionCodes();
// Returns the UI components for the CLDR |region_code|. Uses the strings from
-// |localization|. Returns an empty vector on error.
+// |localization|. The components can be in default or Latin order, depending on
+// the language of |localization|.
+//
+// Sets the |best_address_language_tag| to the BCP 47 language tag that should
+// be saved with this address. This language will be used to get drop-downs to
+// help users fill in their address, and to format the address that the user
+// entered. The parameter should not be NULL.
+//
+// Returns an empty vector on error.
std::vector<AddressUiComponent> BuildComponents(
const std::string& region_code,
- const Localization& localization);
+ const Localization& localization,
+ std::string* best_address_language_tag);
} // namespace addressinput
} // namespace i18n
diff --git a/cpp/include/libaddressinput/localization.h b/cpp/include/libaddressinput/localization.h
index 6b599c3..77a1917 100644
--- a/cpp/include/libaddressinput/localization.h
+++ b/cpp/include/libaddressinput/localization.h
@@ -23,12 +23,14 @@ namespace addressinput {
// The object to retrieve localized strings based on message IDs. Sample usage:
// Localization localization;
// localization.SetLanguage("en");
-// Process(BuildComponents("CA", localization));
+// std::string best_language_tag;
+// Process(BuildComponents("CA", localization, &best_language_tag));
//
// Alternative usage:
// Localization localization;
-// localization.SetGetter(&MyStringGetter);
-// Process(BuildComponents("CA", localization));
+// localization.SetGetter(&MyStringGetter, "fr");
+// std::string best_language_tag;
+// Process(BuildComponents("CA", localization, &best_language_tag));
class Localization {
public:
// Initializes with English messages by default.
@@ -41,15 +43,22 @@ class Localization {
// Sets the language for the strings. The only supported language is "en"
// until we have translations.
- void SetLanguage(const std::string& language_code);
+ void SetLanguage(const std::string& language_tag);
// Sets the string getter that takes a message identifier and returns the
- // corresponding localized string.
- void SetGetter(std::string (*getter)(int));
+ // corresponding localized string. The |language_tag| parameter is used only
+ // for information purposes here.
+ void SetGetter(std::string (*getter)(int), const std::string& language_tag);
+
+ // Returns the current language tag.
+ const std::string& GetLanguage() const { return language_tag_; }
private:
// The string getter.
std::string (*get_string_)(int);
+
+ // The current language tag.
+ std::string language_tag_;
};
} // namespace addressinput
diff --git a/cpp/libaddressinput.gyp b/cpp/libaddressinput.gyp
index e57bda0..b4ab884 100644
--- a/cpp/libaddressinput.gyp
+++ b/cpp/libaddressinput.gyp
@@ -37,6 +37,7 @@
'src/address_problem.cc',
'src/address_ui.cc',
'src/address_validator.cc',
+ 'src/language.cc',
'src/localization.cc',
'src/lookup_key.cc',
'src/lookup_key_util.cc',
@@ -75,6 +76,7 @@
'test/fake_downloader_test.cc',
'test/fake_storage.cc',
'test/fake_storage_test.cc',
+ 'test/language_test.cc',
'test/localization_test.cc',
'test/lookup_key_test.cc',
'test/lookup_key_util_test.cc',
diff --git a/cpp/src/address_ui.cc b/cpp/src/address_ui.cc
index 15f3c61..0eb28fc 100644
--- a/cpp/src/address_ui.cc
+++ b/cpp/src/address_ui.cc
@@ -18,12 +18,15 @@
#include <libaddressinput/address_ui_component.h>
#include <libaddressinput/localization.h>
+#include <cassert>
+#include <cstddef>
#include <set>
#include <string>
#include <vector>
#include "address_field_util.h"
#include "grit.h"
+#include "language.h"
#include "messages.h"
#include "region_data_constants.h"
#include "rule.h"
@@ -33,6 +36,41 @@ namespace addressinput {
namespace {
+Language ChooseBestAddressLanguage(
+ const std::vector<Language>& available_languages,
+ bool has_latin_format,
+ const Language& ui_language) {
+ if (available_languages.empty()) {
+ return ui_language;
+ }
+
+ if (ui_language.tag.empty()) {
+ return available_languages.front();
+ }
+
+ // The conventionally formatted BCP 47 Latin script with a preceding subtag
+ // separator.
+ static const char kLatinScriptSuffix[] = "-Latn";
+ Language latin_script_language(
+ available_languages.front().base + kLatinScriptSuffix);
+ if (has_latin_format && ui_language.has_latin_script) {
+ return latin_script_language;
+ }
+
+ for (std::vector<Language>::const_iterator
+ available_lang_it = available_languages.begin();
+ available_lang_it != available_languages.end(); ++available_lang_it) {
+ // Base language comparison works because no region supports the same base
+ // language with different scripts, for now. For example, no region supports
+ // "zh-Hant" and "zh-Hans" at the same time.
+ if (ui_language.base == available_lang_it->base) {
+ return *available_lang_it;
+ }
+ }
+
+ return has_latin_format ? latin_script_language : available_languages.front();
+}
+
int GetMessageIdForField(AddressField field,
int admin_area_name_message_id,
int postal_code_name_message_id) {
@@ -71,7 +109,9 @@ const std::vector<std::string>& GetRegionCodes() {
std::vector<AddressUiComponent> BuildComponents(
const std::string& region_code,
- const Localization& localization) {
+ const Localization& localization,
+ std::string* best_address_language_tag) {
+ assert(best_address_language_tag != NULL);
std::vector<AddressUiComponent> result;
Rule rule;
@@ -81,15 +121,31 @@ std::vector<AddressUiComponent> BuildComponents(
return result;
}
+ std::vector<Language> available_languages;
+ for (std::vector<std::string>::const_iterator language_tag_it =
+ rule.GetLanguages().begin();
+ language_tag_it != rule.GetLanguages().end(); ++language_tag_it) {
+ available_languages.push_back(Language(*language_tag_it));
+ }
+
+ const Language& best_address_language = ChooseBestAddressLanguage(
+ available_languages, !rule.GetLatinFormat().empty(),
+ Language(localization.GetLanguage()));
+ *best_address_language_tag = best_address_language.tag;
+
+ const std::vector<AddressField>& format =
+ !rule.GetLatinFormat().empty() &&
+ best_address_language.has_latin_script
+ ? rule.GetLatinFormat() : rule.GetFormat();
+
// For avoiding showing an input field twice, when the field is displayed
// twice on an envelope.
std::set<AddressField> fields;
bool previous_field_is_newline = true;
bool next_field_is_newline = true;
- for (std::vector<AddressField>::const_iterator field_it =
- rule.GetFormat().begin();
- field_it != rule.GetFormat().end(); ++field_it) {
+ for (std::vector<AddressField>::const_iterator field_it = format.begin();
+ field_it != format.end(); ++field_it) {
if (IsNewline(*field_it)) {
previous_field_is_newline = true;
continue;
diff --git a/cpp/src/language.cc b/cpp/src/language.cc
new file mode 100644
index 0000000..5c212dd
--- /dev/null
+++ b/cpp/src/language.cc
@@ -0,0 +1,58 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "language.h"
+
+#include <algorithm>
+#include <cctype>
+#include <string>
+#include <vector>
+
+#include "util/string_split.h"
+
+namespace i18n {
+namespace addressinput {
+
+Language::Language(const std::string& language_tag) : tag(language_tag),
+ base(),
+ has_latin_script(false) {
+ // Character '-' is the separator for subtags in the BCP 47. However, some
+ // legacy code generates tags with '_' instead of '-'.
+ static const char kSubtagsSeparator = '-';
+ static const char kAlternativeSubtagsSeparator = '_';
+ std::replace(tag.begin(), tag.end(), kAlternativeSubtagsSeparator,
+ kSubtagsSeparator);
+
+ // OK to use 'tolower' because BCP 47 tags are always in ASCII.
+ std::string lowercase = tag;
+ std::transform(lowercase.begin(), lowercase.end(), lowercase.begin(),
+ tolower);
+
+ base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));
+
+ // The lowercase BCP 47 subtag for Latin script.
+ static const char kLowercaseLatinScript[] = "latn";
+ std::vector<std::string> subtags;
+ SplitString(lowercase, kSubtagsSeparator, &subtags);
+
+ // Support only the second and third position for the script.
+ has_latin_script =
+ (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
+ (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
+}
+
+Language::~Language() {}
+
+} // namespace addressinput
+} // namespace i18n
diff --git a/cpp/src/language.h b/cpp/src/language.h
new file mode 100644
index 0000000..f2cc447
--- /dev/null
+++ b/cpp/src/language.h
@@ -0,0 +1,44 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef I18N_ADDRESSINPUT_LANGUAGE_H_
+#define I18N_ADDRESSINPUT_LANGUAGE_H_
+
+#include <string>
+
+namespace i18n {
+namespace addressinput {
+
+// Helper for working with a BCP 47 language tag.
+// http://tools.ietf.org/html/bcp47
+struct Language {
+ explicit Language(const std::string& language_tag);
+ ~Language();
+
+ // The language tag (with '_' replaced with '-'), for example "zh-Latn-CN".
+ std::string tag;
+
+ // The base language, for example "zh". Always lowercase.
+ std::string base;
+
+ // True if the language tag explicitly has a Latin script. For example, this
+ // is true for "zh-Latn", but false for "zh". Only the second and third subtag
+ // positions are supported for script.
+ bool has_latin_script;
+};
+
+} // namespace addressinput
+} // namespace i18n
+
+#endif // I18N_ADDRESSINPUT_LANGUAGE_H_
diff --git a/cpp/src/localization.cc b/cpp/src/localization.cc
index 3fd32af..1558b23 100644
--- a/cpp/src/localization.cc
+++ b/cpp/src/localization.cc
@@ -23,7 +23,9 @@ namespace addressinput {
namespace {
-// For each language code XX with translations:
+static const char kDefaultLanguage[] = "en";
+
+// For each language XX with translations:
// (1) Add a namespace XX here with an include of "XX_messages.cc".
// (2) Add a wrapper that converts the char pointer to std::string. (GRIT
// generated functions return char pointers.)
@@ -41,7 +43,8 @@ std::string GetStdString(int message_id) {
} // namespace
-Localization::Localization() : get_string_(&en::GetStdString) {}
+Localization::Localization() : get_string_(&en::GetStdString),
+ language_tag_(kDefaultLanguage) {}
Localization::~Localization() {}
@@ -49,17 +52,20 @@ std::string Localization::GetString(int message_id) const {
return get_string_(message_id);
}
-void Localization::SetLanguage(const std::string& language_code) {
- if (language_code == "en") {
+void Localization::SetLanguage(const std::string& language_tag) {
+ if (language_tag == kDefaultLanguage) {
get_string_ = &en::GetStdString;
} else {
assert(false);
}
+ language_tag_ = language_tag;
}
-void Localization::SetGetter(std::string (*getter)(int)) {
+void Localization::SetGetter(std::string (*getter)(int),
+ const std::string& language_tag) {
assert(getter != NULL);
get_string_ = getter;
+ language_tag_ = language_tag;
}
} // namespace addressinput
diff --git a/cpp/src/post_box_matchers.cc b/cpp/src/post_box_matchers.cc
index fd0602a..95ee375 100644
--- a/cpp/src/post_box_matchers.cc
+++ b/cpp/src/post_box_matchers.cc
@@ -16,14 +16,13 @@
#include "post_box_matchers.h"
-#include <algorithm>
-#include <cctype>
#include <cstddef>
#include <map>
#include <string>
#include <utility>
#include <vector>
+#include "language.h"
#include "rule.h"
namespace i18n {
@@ -97,15 +96,6 @@ std::map<std::string, const RE2ptr*> InitMatchers() {
return matchers;
}
-std::string GetBaseLanguage(const std::string& language) {
- // Be lenient in parsing, allow underscore separators and uppercase letters.
- std::string::size_type end = language.find_first_of("-_");
- std::string base(
- end == std::string::npos ? language : language.substr(0, end));
- std::transform(base.begin(), base.end(), base.begin(), tolower);
- return base;
-}
-
} // namespace
// static
@@ -118,7 +108,8 @@ std::vector<const RE2ptr*> PostBoxMatchers::GetMatchers(
for (std::vector<std::string>::const_iterator
it = country_rule.GetLanguages().begin();
it != country_rule.GetLanguages().end(); ++it) {
- languages.push_back(GetBaseLanguage(*it));
+ Language language(*it);
+ languages.push_back(language.base);
}
std::vector<const RE2ptr*> result;
diff --git a/cpp/src/rule.cc b/cpp/src/rule.cc
index ebd5be5..48ec3dc 100644
--- a/cpp/src/rule.cc
+++ b/cpp/src/rule.cc
@@ -40,10 +40,11 @@ typedef std::map<std::string, int> NameMessageIdMap;
const char kAdminAreaNameTypeKey[] = "state_name_type";
const char kFormatKey[] = "fmt";
const char kIdKey[] = "id";
+const char kLanguagesKey[] = "languages";
+const char kLatinFormatKey[] = "lfmt";
const char kPostalCodeNameTypeKey[] = "zip_name_type";
const char kRequireKey[] = "require";
const char kSubKeysKey[] = "sub_keys";
-const char kLanguagesKey[] = "languages";
const char kZipKey[] = "zip";
// Used as a separator in a list of items. For example, the list of supported
@@ -108,6 +109,7 @@ int GetMessageIdFromName(const std::string& name,
Rule::Rule()
: id_(),
format_(),
+ latin_format_(),
required_(),
sub_keys_(),
languages_(),
@@ -132,6 +134,7 @@ const Rule& Rule::GetDefault() {
void Rule::CopyFrom(const Rule& rule) {
id_ = rule.id_;
format_ = rule.format_;
+ latin_format_ = rule.latin_format_;
required_ = rule.required_;
sub_keys_ = rule.sub_keys_;
languages_ = rule.languages_;
@@ -158,6 +161,11 @@ bool Rule::ParseSerializedRule(const std::string& serialized_rule) {
ParseAddressFieldsFormat(json.GetStringValueForKey(kFormatKey), &format_);
}
+ if (json.HasStringValueForKey(kLatinFormatKey)) {
+ ParseAddressFieldsFormat(
+ json.GetStringValueForKey(kLatinFormatKey), &latin_format_);
+ }
+
if (json.HasStringValueForKey(kRequireKey)) {
ParseAddressFieldsRequired(
json.GetStringValueForKey(kRequireKey), &required_);
diff --git a/cpp/src/rule.h b/cpp/src/rule.h
index dbba9f1..bd74784 100644
--- a/cpp/src/rule.h
+++ b/cpp/src/rule.h
@@ -58,10 +58,16 @@ class Rule {
// Returns the ID string for this rule.
const std::string& GetId() const { return id_; };
- // Returns the address format for this rule. The format can include the
- // NEWLINE extension for AddressField enum.
+ // Returns the approximate address format with the default order of fields.
+ // The format can include the NEWLINE extension for AddressField enum.
const std::vector<AddressField>& GetFormat() const { return format_; }
+ // Returns the approximate address format with the Latin order of fields. The
+ // format can include the NEWLINE extension for AddressField enum.
+ const std::vector<AddressField>& GetLatinFormat() const {
+ return latin_format_;
+ }
+
// Returns the required fields for this rule.
const std::vector<AddressField>& GetRequired() const { return required_; }
@@ -71,8 +77,8 @@ class Rule {
// "CA", "NY", "TX", etc.
const std::vector<std::string>& GetSubKeys() const { return sub_keys_; }
- // Returns all of the language codes for which this rule has custom rules, for
- // example ["de", "fr", "it"].
+ // Returns all of the language tags supported by this rule, for example ["de",
+ // "fr", "it"].
const std::vector<std::string>& GetLanguages() const { return languages_; }
// Returns a pointer to a RE2 regular expression object created from the
@@ -97,6 +103,7 @@ class Rule {
private:
std::string id_;
std::vector<AddressField> format_;
+ std::vector<AddressField> latin_format_;
std::vector<AddressField> required_;
std::vector<std::string> sub_keys_;
std::vector<std::string> languages_;
diff --git a/cpp/test/address_ui_test.cc b/cpp/test/address_ui_test.cc
index b8cb834..b36ea8c 100644
--- a/cpp/test/address_ui_test.cc
+++ b/cpp/test/address_ui_test.cc
@@ -26,13 +26,16 @@
namespace {
+using i18n::addressinput::ADMIN_AREA;
using i18n::addressinput::AddressField;
using i18n::addressinput::AddressUiComponent;
using i18n::addressinput::BuildComponents;
using i18n::addressinput::COUNTRY;
using i18n::addressinput::GetRegionCodes;
using i18n::addressinput::Localization;
+using i18n::addressinput::POSTAL_CODE;
using i18n::addressinput::RECIPIENT;
+using i18n::addressinput::STREET_ADDRESS;
// Returns testing::AssertionSuccess if the |components| are valid. Uses
// |region_code| in test failure messages.
@@ -66,6 +69,7 @@ testing::AssertionResult ComponentsAreValid(
class AddressUiTest : public testing::TestWithParam<std::string> {
protected:
Localization localization_;
+ std::string best_address_language_tag_;
};
// Verifies that a region code consists of two characters, for example "TW".
@@ -76,14 +80,15 @@ TEST_P(AddressUiTest, RegionCodeHasTwoCharacters) {
// Verifies that BuildComponents() returns valid UI components for a region
// code.
TEST_P(AddressUiTest, ComponentsAreValid) {
- EXPECT_TRUE(ComponentsAreValid(BuildComponents(GetParam(), localization_)));
+ EXPECT_TRUE(ComponentsAreValid(BuildComponents(
+ GetParam(), localization_, &best_address_language_tag_)));
}
// Verifies that BuildComponents() returns at most one input field of each type.
TEST_P(AddressUiTest, UniqueFieldTypes) {
std::set<AddressField> fields;
const std::vector<AddressUiComponent>& components =
- BuildComponents(GetParam(), localization_);
+ BuildComponents(GetParam(), localization_, &best_address_language_tag_);
for (std::vector<AddressUiComponent>::const_iterator it = components.begin();
it != components.end(); ++it) {
EXPECT_TRUE(fields.insert(it->field).second);
@@ -98,7 +103,130 @@ INSTANTIATE_TEST_CASE_P(
// Verifies that BuildComponents() returns an empty vector for an invalid region
// code.
TEST_F(AddressUiTest, InvalidRegionCodeReturnsEmptyVector) {
- EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_).empty());
+ EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_,
+ &best_address_language_tag_).empty());
}
+// Test data for determining the best language tag and whether the right format
+// pattern was used (fmt vs lfmt).
+struct LanguageTestCase {
+ LanguageTestCase(const std::string& region_code,
+ const std::string& ui_language_tag,
+ const std::string& expected_best_address_language_tag,
+ AddressField expected_first_field)
+ : region_code(region_code),
+ ui_language_tag(ui_language_tag),
+ expected_best_address_language_tag(expected_best_address_language_tag),
+ expected_first_field(expected_first_field) {}
+
+ ~LanguageTestCase() {}
+
+ // The CLDR region code to test.
+ const std::string region_code;
+
+ // The BCP 47 language tag to test.
+ const std::string ui_language_tag;
+
+ // The expected value for the best language tag returned by BuildComponents().
+ const std::string expected_best_address_language_tag;
+
+ // The first field expected to be returned from BuildComponents(). Useful for
+ // determining whether the returned format is in Latin or default order.
+ const AddressField expected_first_field;
+};
+
+class BestAddressLanguageTagTest
+ : public testing::TestWithParam<LanguageTestCase> {
+ protected:
+ Localization localization_;
+ std::string best_address_language_tag_;
+};
+
+std::string GetterStub(int) { return std::string(); }
+
+TEST_P(BestAddressLanguageTagTest, CorrectBestAddressLanguageTag) {
+ localization_.SetGetter(&GetterStub, GetParam().ui_language_tag);
+ const std::vector<AddressUiComponent>& components = BuildComponents(
+ GetParam().region_code, localization_, &best_address_language_tag_);
+ EXPECT_EQ(GetParam().expected_best_address_language_tag,
+ best_address_language_tag_);
+ ASSERT_FALSE(components.empty());
+ EXPECT_EQ(GetParam().expected_first_field, components.front().field);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ LanguageTestCases, BestAddressLanguageTagTest,
+ testing::Values(
+ // Armenia supports hy and has a Latin format.
+ LanguageTestCase("AM", "", "hy", RECIPIENT),
+ LanguageTestCase("AM", "hy", "hy", RECIPIENT),
+ LanguageTestCase("AM", "en", "hy-Latn", RECIPIENT),
+
+ // P.R. China supports zh-Hans and has a Latin format.
+ LanguageTestCase("CN", "zh-hans", "zh-Hans", POSTAL_CODE),
+ LanguageTestCase("CN", "zh-hant", "zh-Hans", POSTAL_CODE),
+ LanguageTestCase("CN", "zh-hans-CN", "zh-Hans", POSTAL_CODE),
+ LanguageTestCase("CN", "zh", "zh-Hans", POSTAL_CODE),
+ LanguageTestCase("CN", "ZH_HANS", "zh-Hans", POSTAL_CODE),
+ LanguageTestCase("CN", "zh-cmn-Hans-CN", "zh-Hans", POSTAL_CODE),
+ LanguageTestCase("CN", "zh-Latn", "zh-Latn", RECIPIENT),
+ LanguageTestCase("CN", "zh-latn-CN", "zh-Latn", RECIPIENT),
+ LanguageTestCase("CN", "en", "zh-Latn", RECIPIENT),
+ LanguageTestCase("CN", "ja", "zh-Latn", RECIPIENT),
+ LanguageTestCase("CN", "ko", "zh-Latn", RECIPIENT),
+ LanguageTestCase("CN", "ZH_LATN", "zh-Latn", RECIPIENT),
+ // Libaddressinput does not have information about extended language
+ // subtags, so it uses the zh-Latn language tag for all base languages
+ // that are not zh, even if it's effectively the same language.
+ // Mandarin Chinese, Simplified script, as used in China:
+ LanguageTestCase("CN", "cmn-Hans-CN", "zh-Latn", RECIPIENT),
+
+ // Hong Kong supports zh-Hant and en. It has a Latin format.
+ LanguageTestCase("HK", "zh", "zh-Hant", ADMIN_AREA),
+ LanguageTestCase("HK", "zh-hans", "zh-Hant", ADMIN_AREA),
+ LanguageTestCase("HK", "zh-hant", "zh-Hant", ADMIN_AREA),
+ LanguageTestCase("HK", "zh-yue-HK", "zh-Hant", ADMIN_AREA),
+ LanguageTestCase("HK", "en", "en", ADMIN_AREA),
+ LanguageTestCase("HK", "zh-latn", "zh-Latn", RECIPIENT),
+ LanguageTestCase("HK", "fr", "zh-Latn", RECIPIENT),
+ LanguageTestCase("HK", "ja", "zh-Latn", RECIPIENT),
+ LanguageTestCase("HK", "ko", "zh-Latn", RECIPIENT),
+ // Libaddressinput does not have information about extended language
+ // subtags, so it uses the zh-Latn language tag for all base languages
+ // that are not zh or en, even if it's effectively the same language.
+ // Cantonese Chinese, as used in Hong Kong:
+ LanguageTestCase("HK", "yue-HK", "zh-Latn", RECIPIENT),
+
+ // Macao supports zh-Hant and pt. It has a Latin format.
+ LanguageTestCase("MO", "zh", "zh-Hant", STREET_ADDRESS),
+ LanguageTestCase("MO", "zh-Hant", "zh-Hant", STREET_ADDRESS),
+ LanguageTestCase("MO", "pt", "pt", STREET_ADDRESS),
+ LanguageTestCase("MO", "zh-Latn", "zh-Latn", RECIPIENT),
+ LanguageTestCase("MO", "en", "zh-Latn", RECIPIENT),
+
+ // Switzerland supports de, fr, and it.
+ LanguageTestCase("CH", "de", "de", RECIPIENT),
+ LanguageTestCase("CH", "de-DE", "de", RECIPIENT),
+ LanguageTestCase("CH", "de-Latn-DE", "de", RECIPIENT),
+ LanguageTestCase("CH", "fr", "fr", RECIPIENT),
+ LanguageTestCase("CH", "it", "it", RECIPIENT),
+ LanguageTestCase("CH", "en", "de", RECIPIENT),
+
+ // Antarctica does not have language information.
+ LanguageTestCase("AQ", "en", "en", RECIPIENT),
+ LanguageTestCase("AQ", "fr", "fr", RECIPIENT),
+ LanguageTestCase("AQ", "es", "es", RECIPIENT),
+ LanguageTestCase("AQ", "zh-Hans", "zh-Hans", RECIPIENT),
+
+ // Egypt supports ar and has a Latin format.
+ LanguageTestCase("EG", "ar", "ar", RECIPIENT),
+ LanguageTestCase("EG", "ar-Arab", "ar", RECIPIENT),
+ LanguageTestCase("EG", "ar-Latn", "ar-Latn", RECIPIENT),
+ LanguageTestCase("EG", "fr", "ar-Latn", RECIPIENT),
+ LanguageTestCase("EG", "fa", "ar-Latn", RECIPIENT),
+ // Libaddressinput does not have language-to-script mapping, so it uses
+ // the ar-Latn language tag for all base languages that are not ar, even
+ // if the script is the same.
+ LanguageTestCase("EG", "fa-Arab", "ar-Latn", RECIPIENT)));
+
} // namespace
diff --git a/cpp/test/language_test.cc b/cpp/test/language_test.cc
new file mode 100644
index 0000000..197459e
--- /dev/null
+++ b/cpp/test/language_test.cc
@@ -0,0 +1,62 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "language.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+namespace {
+
+using i18n::addressinput::Language;
+
+struct LanguageTestCase {
+ LanguageTestCase(const std::string& input_language_tag,
+ const std::string& expected_language_tag,
+ const std::string& expected_base_language,
+ bool expected_has_latin_script)
+ : input_language_tag(input_language_tag),
+ expected_language_tag(expected_language_tag),
+ expected_base_language(expected_base_language),
+ expected_has_latin_script(expected_has_latin_script) {}
+
+ ~LanguageTestCase() {}
+
+ const std::string input_language_tag;
+ const std::string expected_language_tag;
+ const std::string expected_base_language;
+ const bool expected_has_latin_script;
+};
+
+class LanguageTest : public testing::TestWithParam<LanguageTestCase> {};
+
+TEST_P(LanguageTest, ExtractedDataIsCorrect) {
+ Language language(GetParam().input_language_tag);
+ EXPECT_EQ(GetParam().expected_language_tag, language.tag);
+ EXPECT_EQ(GetParam().expected_base_language, language.base);
+ EXPECT_EQ(GetParam().expected_has_latin_script, language.has_latin_script);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ LanguageTestCases, LanguageTest,
+ testing::Values(
+ LanguageTestCase("", "", "", false),
+ LanguageTestCase("en", "en", "en", false),
+ LanguageTestCase("zh-Latn-CN", "zh-Latn-CN", "zh", true),
+ LanguageTestCase("zh-cmn-Latn-CN", "zh-cmn-Latn-CN", "zh", true),
+ LanguageTestCase("zh-Hans", "zh-Hans", "zh", false),
+ LanguageTestCase("en_GB", "en-GB", "en", false)));
+
+} // namespace
diff --git a/cpp/test/localization_test.cc b/cpp/test/localization_test.cc
index 6c8b3be..334068f 100644
--- a/cpp/test/localization_test.cc
+++ b/cpp/test/localization_test.cc
@@ -33,11 +33,13 @@ class LocalizationTest : public testing::TestWithParam<int> {
};
// Verifies that a custom message getter can be used.
-const char kValidMessage[] = "Data";
+static const char kValidMessage[] = "Data";
+static const char kValidLanguageTag[] = "tlh";
std::string GetValidMessage(int message_id) { return kValidMessage; }
TEST_P(LocalizationTest, ValidStringGetterCanBeUsed) {
- localization_.SetGetter(&GetValidMessage);
+ localization_.SetGetter(&GetValidMessage, kValidLanguageTag);
EXPECT_EQ(kValidMessage, localization_.GetString(GetParam()));
+ EXPECT_EQ(kValidLanguageTag, localization_.GetLanguage());
}
// Verifies that the default language for messages does not have empty strings.
@@ -45,8 +47,8 @@ TEST_P(LocalizationTest, DefaultStringIsNotEmpty) {
EXPECT_FALSE(localization_.GetString(GetParam()).empty());
}
-// Verifies that English is the default language.
-TEST_P(LocalizationTest, EnglishIsDefaultLanguage) {
+// Verifies that the default string is English.
+TEST_P(LocalizationTest, DefaultStringIsEnglish) {
std::string default_string = localization_.GetString(GetParam());
localization_.SetLanguage("en");
EXPECT_EQ(default_string, localization_.GetString(GetParam()));
@@ -80,4 +82,9 @@ TEST_F(LocalizationTest, InvalidMessageIsEmptyString) {
EXPECT_TRUE(localization_.GetString(INVALID_MESSAGE_ID).empty());
}
+// Verifies that the default language is English.
+TEST_F(LocalizationTest, DefaultLanguageIsEnglish) {
+ EXPECT_EQ("en", localization_.GetLanguage());
+}
+
} // namespace
diff --git a/cpp/test/rule_test.cc b/cpp/test/rule_test.cc
index b9e671a..ca54006 100644
--- a/cpp/test/rule_test.cc
+++ b/cpp/test/rule_test.cc
@@ -43,6 +43,7 @@ TEST(RuleTest, CopyOverwritesRule) {
Rule rule;
ASSERT_TRUE(rule.ParseSerializedRule("{"
"\"fmt\":\"%S%Z\","
+ "\"lfmt\":\"%Z%S\","
"\"id\":\"data/XA\","
"\"lname\":\"Testistan\","
"\"require\":\"AC\","
@@ -55,6 +56,7 @@ TEST(RuleTest, CopyOverwritesRule) {
Rule copy;
EXPECT_NE(rule.GetFormat(), copy.GetFormat());
+ EXPECT_NE(rule.GetLatinFormat(), copy.GetLatinFormat());
EXPECT_NE(rule.GetId(), copy.GetId());
EXPECT_NE(rule.GetRequired(), copy.GetRequired());
EXPECT_NE(rule.GetSubKeys(), copy.GetSubKeys());
@@ -69,6 +71,7 @@ TEST(RuleTest, CopyOverwritesRule) {
copy.CopyFrom(rule);
EXPECT_EQ(rule.GetFormat(), copy.GetFormat());
+ EXPECT_EQ(rule.GetLatinFormat(), copy.GetLatinFormat());
EXPECT_EQ(rule.GetId(), copy.GetId());
EXPECT_EQ(rule.GetRequired(), copy.GetRequired());
EXPECT_EQ(rule.GetSubKeys(), copy.GetSubKeys());
@@ -115,6 +118,15 @@ TEST(RuleTest, ParsesFormatCorrectly) {
EXPECT_EQ(expected, rule.GetFormat());
}
+TEST(RuleTest, ParsesLatinFormatCorrectly) {
+ std::vector<AddressField> expected;
+ expected.push_back(LOCALITY);
+ expected.push_back(ADMIN_AREA);
+ Rule rule;
+ ASSERT_TRUE(rule.ParseSerializedRule("{\"lfmt\":\"%C%S\"}"));
+ EXPECT_EQ(expected, rule.GetLatinFormat());
+}
+
TEST(RuleTest, ParsesRequiredCorrectly) {
std::vector<AddressField> expected;
expected.push_back(STREET_ADDRESS);