aboutsummaryrefslogtreecommitdiff
path: root/cpp/src/language.cc
diff options
context:
space:
mode:
Diffstat (limited to 'cpp/src/language.cc')
-rw-r--r--cpp/src/language.cc102
1 files changed, 102 insertions, 0 deletions
diff --git a/cpp/src/language.cc b/cpp/src/language.cc
new file mode 100644
index 0000000..9e456f0
--- /dev/null
+++ b/cpp/src/language.cc
@@ -0,0 +1,102 @@
+// Copyright (C) 2014 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "language.h"
+
+#include <algorithm>
+#include <cctype>
+#include <string>
+#include <vector>
+
+#include "rule.h"
+#include "util/string_split.h"
+
+namespace i18n {
+namespace addressinput {
+
+Language::Language(const std::string& language_tag) : tag(language_tag),
+ base(),
+ has_latin_script(false) {
+ // Character '-' is the separator for subtags in the BCP 47. However, some
+ // legacy code generates tags with '_' instead of '-'.
+ static const char kSubtagsSeparator = '-';
+ static const char kAlternativeSubtagsSeparator = '_';
+ std::replace(tag.begin(), tag.end(), kAlternativeSubtagsSeparator,
+ kSubtagsSeparator);
+
+ // OK to use 'tolower' because BCP 47 tags are always in ASCII.
+ std::string lowercase = tag;
+ std::transform(lowercase.begin(), lowercase.end(), lowercase.begin(),
+ tolower);
+
+ base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));
+
+ // The lowercase BCP 47 subtag for Latin script.
+ static const char kLowercaseLatinScript[] = "latn";
+ std::vector<std::string> subtags;
+ SplitString(lowercase, kSubtagsSeparator, &subtags);
+
+ // Support only the second and third position for the script.
+ has_latin_script =
+ (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
+ (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
+}
+
+Language::~Language() {}
+
+Language ChooseBestAddressLanguage(const Rule& address_region_rule,
+ const Language& ui_language) {
+ if (address_region_rule.GetLanguages().empty()) {
+ return ui_language;
+ }
+
+ std::vector<Language> available_languages;
+ for (std::vector<std::string>::const_iterator
+ language_tag_it = address_region_rule.GetLanguages().begin();
+ language_tag_it != address_region_rule.GetLanguages().end();
+ ++language_tag_it) {
+ available_languages.push_back(Language(*language_tag_it));
+ }
+
+ if (ui_language.tag.empty()) {
+ return available_languages.front();
+ }
+
+ bool has_latin_format = !address_region_rule.GetLatinFormat().empty();
+
+ // The conventionally formatted BCP 47 Latin script with a preceding subtag
+ // separator.
+ static const char kLatinScriptSuffix[] = "-Latn";
+ Language latin_script_language(
+ available_languages.front().base + kLatinScriptSuffix);
+ if (has_latin_format && ui_language.has_latin_script) {
+ return latin_script_language;
+ }
+
+ for (std::vector<Language>::const_iterator
+ available_lang_it = available_languages.begin();
+ available_lang_it != available_languages.end(); ++available_lang_it) {
+ // Base language comparison works because no region supports the same base
+ // language with different scripts, for now. For example, no region supports
+ // "zh-Hant" and "zh-Hans" at the same time.
+ if (ui_language.base == available_lang_it->base) {
+ return *available_lang_it;
+ }
+ }
+
+ return has_latin_format ? latin_script_language : available_languages.front();
+}
+
+} // namespace addressinput
+} // namespace i18n