aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTorne (Richard Coles) <torne@google.com>2014-06-25 10:31:36 +0100
committerTorne (Richard Coles) <torne@google.com>2014-06-25 10:31:36 +0100
commit6ce3a9ad00160cd58574b6bca6d2220c4dbfc83e (patch)
treebd4c283a39b6659d25c49870a045ff73b049eebc
parentb8347ad8ead685b8afe0ff329ae047f17c7b817c (diff)
parentf7ddeee545f03c948074c921c4648807d90227ae (diff)
downloadsrc-l-preview.tar.gz
This commit was generated by merge_to_master.py. Change-Id: I283eef90c15d40ea8cd6290f12094fc152a2c45e
-rw-r--r--cpp/include/libaddressinput/address_ui.h3
-rw-r--r--cpp/include/libaddressinput/localization.h27
-rw-r--r--cpp/include/libaddressinput/preload_supplier.h6
-rw-r--r--cpp/src/address_ui.cc5
-rw-r--r--cpp/src/localization.cc36
-rw-r--r--cpp/src/preload_supplier.cc145
-rw-r--r--cpp/src/region_data_constants.cc7
-rw-r--r--cpp/src/util/lru_cache_using_std.h168
-rw-r--r--cpp/src/util/string_compare.cc52
-rw-r--r--cpp/src/util/string_compare.h6
-rw-r--r--cpp/test/address_ui_test.cc17
-rw-r--r--cpp/test/address_validator_test.cc83
-rw-r--r--cpp/test/localization_test.cc40
-rw-r--r--cpp/test/util/string_compare_test.cc33
-rw-r--r--cpp/test/validation_task_test.cc6
15 files changed, 513 insertions, 121 deletions
diff --git a/cpp/include/libaddressinput/address_ui.h b/cpp/include/libaddressinput/address_ui.h
index 8939501..cc39f6a 100644
--- a/cpp/include/libaddressinput/address_ui.h
+++ b/cpp/include/libaddressinput/address_ui.h
@@ -29,7 +29,7 @@ const std::vector<std::string>& GetRegionCodes();
// Returns the UI components for the CLDR |region_code|. Uses the strings from
// |localization|. The components can be in default or Latin order, depending on
-// the language of |localization|.
+// the BCP 47 |ui_language_tag|.
//
// Sets the |best_address_language_tag| to the BCP 47 language tag that should
// be saved with this address. This language will be used to get drop-downs to
@@ -40,6 +40,7 @@ const std::vector<std::string>& GetRegionCodes();
std::vector<AddressUiComponent> BuildComponents(
const std::string& region_code,
const Localization& localization,
+ const std::string& ui_language_tag,
std::string* best_address_language_tag);
} // namespace addressinput
diff --git a/cpp/include/libaddressinput/localization.h b/cpp/include/libaddressinput/localization.h
index 08b5964..acfdf7e 100644
--- a/cpp/include/libaddressinput/localization.h
+++ b/cpp/include/libaddressinput/localization.h
@@ -26,17 +26,17 @@ namespace addressinput {
struct AddressData;
-// The object to retrieve localized strings based on message IDs. Sample usage:
+// The object to retrieve localized strings based on message IDs. It returns
+// English by default. Sample usage:
// Localization localization;
-// localization.SetLanguage("en");
// std::string best_language_tag;
-// Process(BuildComponents("CA", localization, &best_language_tag));
+// Process(BuildComponents("CA", localization, "en-US", &best_language_tag));
//
// Alternative usage:
// Localization localization;
-// localization.SetGetter(&MyStringGetter, "fr");
+// localization.SetGetter(&MyStringGetter);
// std::string best_language_tag;
-// Process(BuildComponents("CA", localization, &best_language_tag));
+// Process(BuildComponents("CA", localization, "fr-CA", &best_language_tag));
class Localization {
public:
// Initializes with English messages by default.
@@ -63,17 +63,11 @@ class Localization {
bool enable_examples,
bool enable_links) const;
- // Sets the language for the strings. The only supported language is "en"
- // until we have translations.
- void SetLanguage(const std::string& language_tag);
-
// Sets the string getter that takes a message identifier and returns the
- // corresponding localized string. The |language_tag| parameter is used only
- // for information purposes here.
- void SetGetter(std::string (*getter)(int), const std::string& language_tag);
-
- // Returns the current language tag.
- const std::string& GetLanguage() const { return language_tag_; }
+ // corresponding localized string. For example, in Chromium there is
+ // l10n_util::GetStringUTF8 which always returns strings in the current
+ // application locale.
+ void SetGetter(std::string (*getter)(int));
private:
// Returns the error message where the address field is a postal code. Helper
@@ -90,9 +84,6 @@ class Localization {
// The string getter.
std::string (*get_string_)(int);
-
- // The current language tag.
- std::string language_tag_;
};
} // namespace addressinput
diff --git a/cpp/include/libaddressinput/preload_supplier.h b/cpp/include/libaddressinput/preload_supplier.h
index f7654ab..740b4d0 100644
--- a/cpp/include/libaddressinput/preload_supplier.h
+++ b/cpp/include/libaddressinput/preload_supplier.h
@@ -20,14 +20,15 @@
#include <libaddressinput/util/basictypes.h>
#include <libaddressinput/util/scoped_ptr.h>
-#include <map>
#include <set>
#include <string>
+#include <vector>
namespace i18n {
namespace addressinput {
class Downloader;
+class IndexMap;
class LookupKey;
class Retriever;
class Rule;
@@ -90,7 +91,8 @@ class PreloadSupplier : public Supplier {
const scoped_ptr<const Retriever> retriever_;
std::set<std::string> pending_;
- std::map<std::string, const Rule*> rule_cache_;
+ const scoped_ptr<IndexMap> rule_index_;
+ std::vector<const Rule*> rule_storage_;
DISALLOW_COPY_AND_ASSIGN(PreloadSupplier);
};
diff --git a/cpp/src/address_ui.cc b/cpp/src/address_ui.cc
index d515765..a099335 100644
--- a/cpp/src/address_ui.cc
+++ b/cpp/src/address_ui.cc
@@ -82,6 +82,7 @@ const std::vector<std::string>& GetRegionCodes() {
std::vector<AddressUiComponent> BuildComponents(
const std::string& region_code,
const Localization& localization,
+ const std::string& ui_language_tag,
std::string* best_address_language_tag) {
assert(best_address_language_tag != NULL);
std::vector<AddressUiComponent> result;
@@ -93,8 +94,8 @@ std::vector<AddressUiComponent> BuildComponents(
return result;
}
- const Language& best_address_language = ChooseBestAddressLanguage(
- rule, Language(localization.GetLanguage()));
+ const Language& best_address_language =
+ ChooseBestAddressLanguage(rule, Language(ui_language_tag));
*best_address_language_tag = best_address_language.tag;
const std::vector<FormatElement>& format =
diff --git a/cpp/src/localization.cc b/cpp/src/localization.cc
index 951f1a7..9bb32a5 100644
--- a/cpp/src/localization.cc
+++ b/cpp/src/localization.cc
@@ -25,6 +25,7 @@
#include "grit.h"
#include "region_data_constants.h"
#include "rule.h"
+#include "util/string_split.h"
#include "util/string_util.h"
namespace {
@@ -42,28 +43,16 @@ namespace addressinput {
namespace {
-static const char kDefaultLanguage[] = "en";
-
-// For each language XX with translations:
-// (1) Add a namespace XX here with an include of "XX_messages.cc".
-// (2) Add a wrapper that converts the char pointer to std::string. (GRIT
-// generated functions return char pointers.)
-// (2) Use the XX::GetStdString in the SetLanguage() method below.
-namespace en {
-
#include "en_messages.cc"
-std::string GetStdString(int message_id) {
+std::string GetEnglishString(int message_id) {
const char* str = GetString(message_id);
return str != NULL ? std::string(str) : std::string();
}
-} // namespace en
-
} // namespace
-Localization::Localization() : get_string_(&en::GetStdString),
- language_tag_(kDefaultLanguage) {}
+Localization::Localization() : get_string_(&GetEnglishString) {}
Localization::~Localization() {}
@@ -83,7 +72,11 @@ std::string Localization::GetErrorMessage(const AddressData& address,
if (rule.ParseSerializedRule(
RegionDataConstants::GetRegionData(address.region_code))) {
if (enable_examples) {
- postal_code_example = rule.GetPostalCodeExample();
+ std::vector<std::string> examples_list;
+ SplitString(rule.GetPostalCodeExample(), ',', &examples_list);
+ if (!examples_list.empty()) {
+ postal_code_example = examples_list.front();
+ }
}
if (enable_links) {
post_service_url = rule.GetPostServiceUrl();
@@ -124,20 +117,9 @@ std::string Localization::GetErrorMessage(const AddressData& address,
}
}
-void Localization::SetLanguage(const std::string& language_tag) {
- if (language_tag == kDefaultLanguage) {
- get_string_ = &en::GetStdString;
- } else {
- assert(false);
- }
- language_tag_ = language_tag;
-}
-
-void Localization::SetGetter(std::string (*getter)(int),
- const std::string& language_tag) {
+void Localization::SetGetter(std::string (*getter)(int)) {
assert(getter != NULL);
get_string_ = getter;
- language_tag_ = language_tag;
}
std::string Localization::GetErrorMessageForPostalCode(
diff --git a/cpp/src/preload_supplier.cc b/cpp/src/preload_supplier.cc
index 5219e9a..31325bd 100644
--- a/cpp/src/preload_supplier.cc
+++ b/cpp/src/preload_supplier.cc
@@ -21,10 +21,13 @@
#include <libaddressinput/util/basictypes.h>
#include <libaddressinput/util/scoped_ptr.h>
+#include <algorithm>
#include <cassert>
#include <cstddef>
+#include <functional>
#include <map>
#include <set>
+#include <stack>
#include <string>
#include <utility>
#include <vector>
@@ -35,12 +38,36 @@
#include "retriever.h"
#include "rule.h"
#include "util/json.h"
+#include "util/string_compare.h"
namespace i18n {
namespace addressinput {
namespace {
+// STL predicate less<> that uses StringCompare to match strings that a human
+// reader would consider to be "the same". The default implementation just does
+// case insensitive string comparison, but StringCompare can be overriden with
+// more sophisticated implementations.
+class IndexLess : public std::binary_function<std::string, std::string, bool> {
+ public:
+ result_type operator()(const first_argument_type& a,
+ const second_argument_type& b) const {
+ return kStringCompare.NaturalLess(a, b);
+ }
+
+ private:
+ static const StringCompare kStringCompare;
+};
+
+const StringCompare IndexLess::kStringCompare;
+
+} // namespace
+
+class IndexMap : public std::map<std::string, const Rule*, IndexLess> {};
+
+namespace {
+
class Helper {
public:
// Does not take ownership of its parameters.
@@ -49,14 +76,17 @@ class Helper {
const PreloadSupplier::Callback& loaded,
const Retriever& retriever,
std::set<std::string>* pending,
- std::map<std::string, const Rule*>* rule_cache)
+ IndexMap* rule_index,
+ std::vector<const Rule*>* rule_storage)
: region_code_(region_code),
loaded_(loaded),
pending_(pending),
- rule_cache_(rule_cache),
+ rule_index_(rule_index),
+ rule_storage_(rule_storage),
retrieved_(BuildCallback(this, &Helper::OnRetrieved)) {
assert(pending_ != NULL);
- assert(rule_cache_ != NULL);
+ assert(rule_index_ != NULL);
+ assert(rule_storage_ != NULL);
assert(retrieved_ != NULL);
pending_->insert(key);
retriever.Retrieve(key, *retrieved_);
@@ -75,6 +105,7 @@ class Helper {
(void)status; // Prevent unused variable if assert() is optimized away.
Json json;
+ std::vector<const Rule*> sub_rules;
if (!success) {
goto callback;
@@ -112,13 +143,95 @@ class Helper {
rule->ParseJsonRule(value);
assert(id == rule->GetId()); // Sanity check.
- std::pair<std::map<std::string, const Rule*>::iterator, bool> result =
- rule_cache_->insert(std::make_pair(rule->GetId(), rule));
+ rule_storage_->push_back(rule);
+ if (depth > 0) {
+ sub_rules.push_back(rule);
+ }
+
+ // Add the ID of this Rule object to the rule index.
+ std::pair<IndexMap::iterator, bool> result =
+ rule_index_->insert(std::make_pair(id, rule));
assert(result.second);
(void)result; // Prevent unused variable if assert() is optimized away.
+
++rule_count;
}
+ /*
+ * Normally the address metadata server takes care of mapping from natural
+ * language names to metadata IDs (eg. "São Paulo" -> "SP") and from Latin
+ * script names to local script names (eg. "Tokushima" -> "徳島県").
+ *
+ * As the PreloadSupplier doesn't contact the metadata server upon each
+ * Supply() request, it instead has an internal lookup table (rule_index_)
+ * that contains such mappings.
+ *
+ * This lookup table is populated by iterating over all sub rules and for
+ * each of them construct ID strings using human readable names (eg. "São
+ * Paulo") and using Latin script names (eg. "Tokushima").
+ */
+ for (std::vector<const Rule*>::const_iterator
+ it = sub_rules.begin(); it != sub_rules.end(); ++it) {
+ std::stack<const Rule*> hierarchy;
+ hierarchy.push(*it);
+
+ // Push pointers to all parent Rule objects onto the hierarchy stack.
+ for (std::string parent_id((*it)->GetId());;) {
+ // Strip the last part of parent_id. Break if COUNTRY level is reached.
+ std::string::size_type pos = parent_id.rfind('/');
+ if (pos == sizeof "data/ZZ" - 1) {
+ break;
+ }
+ parent_id.resize(pos);
+
+ IndexMap::const_iterator jt = rule_index_->find(parent_id);
+ assert(jt != rule_index_->end());
+ hierarchy.push(jt->second);
+ }
+
+ std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1));
+ std::string latin_id(human_id);
+
+ // Append the names from all Rule objects on the hierarchy stack.
+ for (; !hierarchy.empty(); hierarchy.pop()) {
+ const Rule* rule = hierarchy.top();
+
+ human_id.push_back('/');
+ if (!rule->GetName().empty()) {
+ human_id.append(rule->GetName());
+ } else {
+ // If the "name" field is empty, the name is the last part of the ID.
+ const std::string& id = rule->GetId();
+ std::string::size_type pos = id.rfind('/');
+ assert(pos != std::string::npos);
+ human_id.append(id.substr(pos + 1));
+ }
+
+ if (!rule->GetLatinName().empty()) {
+ latin_id.push_back('/');
+ latin_id.append(rule->GetLatinName());
+ }
+ }
+
+ // If the ID has a language tag, copy it.
+ {
+ const std::string& id = (*it)->GetId();
+ std::string::size_type pos = id.rfind("--");
+ if (pos != std::string::npos) {
+ human_id.append(id, pos, id.size() - pos);
+ }
+ }
+
+ rule_index_->insert(std::make_pair(human_id, *it));
+
+ // Add the Latin script ID, if a Latin script name could be found for
+ // every part of the ID.
+ if (std::count(human_id.begin(), human_id.end(), '/') ==
+ std::count(latin_id.begin(), latin_id.end(), '/')) {
+ rule_index_->insert(std::make_pair(latin_id, *it));
+ }
+ }
+
callback:
loaded_(success, region_code_, rule_count);
delete this;
@@ -127,7 +240,8 @@ class Helper {
const std::string region_code_;
const PreloadSupplier::Callback& loaded_;
std::set<std::string>* const pending_;
- std::map<std::string, const Rule*>* const rule_cache_;
+ IndexMap* const rule_index_;
+ std::vector<const Rule*>* const rule_storage_;
const scoped_ptr<const Retriever::Callback> retrieved_;
DISALLOW_COPY_AND_ASSIGN(Helper);
@@ -148,12 +262,13 @@ PreloadSupplier::PreloadSupplier(const std::string& validation_data_url,
Storage* storage)
: retriever_(new Retriever(validation_data_url, downloader, storage)),
pending_(),
- rule_cache_() {}
+ rule_index_(new IndexMap),
+ rule_storage_() {}
PreloadSupplier::~PreloadSupplier() {
- for (std::map<std::string, const Rule*>::const_iterator
- it = rule_cache_.begin(); it != rule_cache_.end(); ++it) {
- delete it->second;
+ for (std::vector<const Rule*>::const_iterator
+ it = rule_storage_.begin(); it != rule_storage_.end(); ++it) {
+ delete *it;
}
}
@@ -192,7 +307,8 @@ void PreloadSupplier::LoadRules(const std::string& region_code,
loaded,
*retriever_,
&pending_,
- &rule_cache_);
+ rule_index_.get(),
+ &rule_storage_);
}
bool PreloadSupplier::IsLoaded(const std::string& region_code) const {
@@ -214,9 +330,8 @@ bool PreloadSupplier::GetRuleHierarchy(const LookupKey& lookup_key,
for (size_t depth = 0; depth <= max_depth; ++depth) {
const std::string& key = lookup_key.ToKeyString(depth);
- std::map<std::string, const Rule*>::const_iterator it =
- rule_cache_.find(key);
- if (it == rule_cache_.end()) {
+ IndexMap::const_iterator it = rule_index_->find(key);
+ if (it == rule_index_->end()) {
return depth > 0; // No data on COUNTRY level is failure.
}
hierarchy->rule[depth] = it->second;
@@ -227,7 +342,7 @@ bool PreloadSupplier::GetRuleHierarchy(const LookupKey& lookup_key,
}
bool PreloadSupplier::IsLoadedKey(const std::string& key) const {
- return rule_cache_.find(key) != rule_cache_.end();
+ return rule_index_->find(key) != rule_index_->end();
}
bool PreloadSupplier::IsPendingKey(const std::string& key) const {
diff --git a/cpp/src/region_data_constants.cc b/cpp/src/region_data_constants.cc
index f3d9c3c..cabee28 100644
--- a/cpp/src/region_data_constants.cc
+++ b/cpp/src/region_data_constants.cc
@@ -12,10 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-// The data in this file will be automatically generated. For now, the data
-// comes from:
-//
-// https://code.google.com/p/libaddressinput/source/browse/trunk/java/src/com/android/i18n/addressinput/RegionDataConstants.java?r=137
+// The data in this file is automatically generated.
#include "region_data_constants.h"
@@ -1263,7 +1260,7 @@ std::map<std::string, std::string> InitRegionData() {
"\"languages\":\"en\""
"}"));
region_data.insert(std::make_pair("US", "{"
- "\"fmt\":\"%N%n%O%n%A%n%C %S %Z\","
+ "\"fmt\":\"%N%n%O%n%A%n%C, %S %Z\","
"\"require\":\"ACSZ\","
"\"zip_name_type\":\"zip\","
"\"state_name_type\":\"state\","
diff --git a/cpp/src/util/lru_cache_using_std.h b/cpp/src/util/lru_cache_using_std.h
new file mode 100644
index 0000000..25aced7
--- /dev/null
+++ b/cpp/src/util/lru_cache_using_std.h
@@ -0,0 +1,168 @@
+/******************************************************************************/
+/* Copyright (c) 2010-2011, Tim Day <timday@timday.com> */
+/* */
+/* Permission to use, copy, modify, and/or distribute this software for any */
+/* purpose with or without fee is hereby granted, provided that the above */
+/* copyright notice and this permission notice appear in all copies. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES */
+/* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR */
+/* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
+/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN */
+/* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF */
+/* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+/******************************************************************************/
+
+// The original source code is from:
+// https://bitbucket.org/timday/lru_cache/src/497822a492a8/include/lru_cache_using_std.h
+
+#ifndef I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_
+#define I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_
+
+#include <cassert>
+#include <list>
+#include <map>
+
+// Class providing fixed-size (by number of records)
+// LRU-replacement cache of a function with signature
+// V f(K).
+// The default comparator/hash/allocator will be used.
+template <
+ typename K,
+ typename V
+ > class lru_cache_using_std
+{
+public:
+
+ typedef K key_type;
+ typedef V value_type;
+
+ // Key access history, most recent at back
+ typedef std::list<key_type> key_tracker_type;
+
+ // Key to value and key history iterator
+ typedef std::map<
+ key_type,
+ std::pair<
+ value_type,
+ typename key_tracker_type::iterator
+ >
+ > key_to_value_type;
+
+ // Constuctor specifies the cached function and
+ // the maximum number of records to be stored
+ lru_cache_using_std(
+ value_type (*f)(const key_type&),
+ size_t c
+ )
+ :_fn(f)
+ ,_capacity(c)
+ {
+ assert(_capacity!=0);
+ }
+
+ // Obtain value of the cached function for k
+ value_type operator()(const key_type& k) {
+
+ // Attempt to find existing record
+ const typename key_to_value_type::iterator it
+ =_key_to_value.find(k);
+
+ if (it==_key_to_value.end()) {
+
+ // We don't have it:
+
+ // Evaluate function and create new record
+ const value_type v=_fn(k);
+ insert(k,v);
+
+ // Return the freshly computed value
+ return v;
+
+ } else {
+
+ // We do have it:
+
+ // Update access record by moving
+ // accessed key to back of list
+ _key_tracker.splice(
+ _key_tracker.end(),
+ _key_tracker,
+ (*it).second.second
+ );
+
+ // Return the retrieved value
+ return (*it).second.first;
+ }
+ }
+
+ // Obtain the cached keys, most recently used element
+ // at head, least recently used at tail.
+ // This method is provided purely to support testing.
+ template <typename IT> void get_keys(IT dst) const {
+ typename key_tracker_type::const_reverse_iterator src
+ =_key_tracker.rbegin();
+ while (src!=_key_tracker.rend()) {
+ *dst++ = *src++;
+ }
+ }
+
+private:
+
+ // Record a fresh key-value pair in the cache
+ void insert(const key_type& k,const value_type& v) {
+
+ // Method is only called on cache misses
+ assert(_key_to_value.find(k)==_key_to_value.end());
+
+ // Make space if necessary
+ if (_key_to_value.size()==_capacity)
+ evict();
+
+ // Record k as most-recently-used key
+ typename key_tracker_type::iterator it
+ =_key_tracker.insert(_key_tracker.end(),k);
+
+ // Create the key-value entry,
+ // linked to the usage record.
+ _key_to_value.insert(
+ std::make_pair(
+ k,
+ std::make_pair(v,it)
+ )
+ );
+ // No need to check return,
+ // given previous assert.
+ }
+
+ // Purge the least-recently-used element in the cache
+ void evict() {
+
+ // Assert method is never called when cache is empty
+ assert(!_key_tracker.empty());
+
+ // Identify least recently used key
+ const typename key_to_value_type::iterator it
+ =_key_to_value.find(_key_tracker.front());
+ assert(it!=_key_to_value.end());
+
+ // Erase both elements to completely purge record
+ _key_to_value.erase(it);
+ _key_tracker.pop_front();
+ }
+
+ // The function to be cached
+ value_type (*_fn)(const key_type&);
+
+ // Maximum number of key-value pairs to be retained
+ const size_t _capacity;
+
+ // Key access history
+ key_tracker_type _key_tracker;
+
+ // Key-to-value lookup
+ key_to_value_type _key_to_value;
+};
+
+#endif // I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_
diff --git a/cpp/src/util/string_compare.cc b/cpp/src/util/string_compare.cc
index c63b138..31a7534 100644
--- a/cpp/src/util/string_compare.cc
+++ b/cpp/src/util/string_compare.cc
@@ -12,20 +12,54 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "re2ptr.h" // Must be the first #include statement!
-
#include "string_compare.h"
#include <libaddressinput/util/basictypes.h>
+#include <cassert>
#include <string>
+#include <re2/re2.h>
+
+#include "lru_cache_using_std.h"
+
+// RE2 uses type string, which is not necessarily the same as type std::string.
+// In order to create objects of the correct type, to be able to pass pointers
+// to these objects to RE2, the function that does that is defined inside an
+// unnamed namespace inside the re2 namespace. Oh, my ...
+namespace re2 {
+namespace {
+
+// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function
+// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would
+// be a case-insensitive match to the string. This is far too expensive to do
+// repeatedly, so the function is only ever called through an LRU cache.
+std::string ComputeMinPossibleMatch(const std::string& str) {
+ string min, max; // N.B.: RE2 type string!
+
+ RE2::Options options;
+ options.set_literal(true);
+ options.set_case_sensitive(false);
+ RE2 matcher(str, options);
+
+ bool success = matcher.PossibleMatchRange(&min, &max, str.size());
+ assert(success);
+ (void)success; // Prevent unused variable if assert() is optimized away.
+
+ return min;
+}
+
+} // namespace
+} // namespace re2
+
namespace i18n {
namespace addressinput {
class StringCompare::Impl {
+ enum { MAX_CACHE_SIZE = 1 << 15 };
+
public:
- Impl() {
+ Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) {
options_.set_literal(true);
options_.set_case_sensitive(false);
}
@@ -37,8 +71,15 @@ class StringCompare::Impl {
return RE2::FullMatch(a, matcher);
}
+ bool NaturalLess(const std::string& a, const std::string& b) const {
+ const std::string& min_a(min_possible_match_(a));
+ const std::string& min_b(min_possible_match_(b));
+ return min_a < min_b;
+ }
+
private:
RE2::Options options_;
+ mutable lru_cache_using_std<std::string, std::string> min_possible_match_;
DISALLOW_COPY_AND_ASSIGN(Impl);
};
@@ -52,5 +93,10 @@ bool StringCompare::NaturalEquals(const std::string& a,
return impl_->NaturalEquals(a, b);
}
+bool StringCompare::NaturalLess(const std::string& a,
+ const std::string& b) const {
+ return impl_->NaturalLess(a, b);
+}
+
} // namespace addressinput
} // namespace i18n
diff --git a/cpp/src/util/string_compare.h b/cpp/src/util/string_compare.h
index 9d530fa..ae680dd 100644
--- a/cpp/src/util/string_compare.h
+++ b/cpp/src/util/string_compare.h
@@ -33,6 +33,12 @@ class StringCompare {
// default implementation just does case insensitive string matching.
bool NaturalEquals(const std::string& a, const std::string& b) const;
+ // Comparison function for use with the STL analogous to NaturalEquals().
+ // Libaddressinput itself isn't really concerned about how this is done, as
+ // long as it conforms to the STL requirements on less<> predicates. This
+ // default implementation is VERY SLOW! Must be replaced if you need speed.
+ bool NaturalLess(const std::string& a, const std::string& b) const;
+
private:
class Impl;
scoped_ptr<Impl> impl_;
diff --git a/cpp/test/address_ui_test.cc b/cpp/test/address_ui_test.cc
index b36ea8c..c23d205 100644
--- a/cpp/test/address_ui_test.cc
+++ b/cpp/test/address_ui_test.cc
@@ -37,6 +37,8 @@ using i18n::addressinput::POSTAL_CODE;
using i18n::addressinput::RECIPIENT;
using i18n::addressinput::STREET_ADDRESS;
+static const char kUiLanguageTag[] = "en";
+
// Returns testing::AssertionSuccess if the |components| are valid. Uses
// |region_code| in test failure messages.
testing::AssertionResult ComponentsAreValid(
@@ -81,14 +83,15 @@ TEST_P(AddressUiTest, RegionCodeHasTwoCharacters) {
// code.
TEST_P(AddressUiTest, ComponentsAreValid) {
EXPECT_TRUE(ComponentsAreValid(BuildComponents(
- GetParam(), localization_, &best_address_language_tag_)));
+ GetParam(), localization_, kUiLanguageTag, &best_address_language_tag_)));
}
// Verifies that BuildComponents() returns at most one input field of each type.
TEST_P(AddressUiTest, UniqueFieldTypes) {
std::set<AddressField> fields;
const std::vector<AddressUiComponent>& components =
- BuildComponents(GetParam(), localization_, &best_address_language_tag_);
+ BuildComponents(GetParam(), localization_, kUiLanguageTag,
+ &best_address_language_tag_);
for (std::vector<AddressUiComponent>::const_iterator it = components.begin();
it != components.end(); ++it) {
EXPECT_TRUE(fields.insert(it->field).second);
@@ -103,8 +106,9 @@ INSTANTIATE_TEST_CASE_P(
// Verifies that BuildComponents() returns an empty vector for an invalid region
// code.
TEST_F(AddressUiTest, InvalidRegionCodeReturnsEmptyVector) {
- EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_,
- &best_address_language_tag_).empty());
+ EXPECT_TRUE(BuildComponents(
+ "INVALID-REGION-CODE", localization_, kUiLanguageTag,
+ &best_address_language_tag_).empty());
}
// Test data for determining the best language tag and whether the right format
@@ -145,9 +149,10 @@ class BestAddressLanguageTagTest
std::string GetterStub(int) { return std::string(); }
TEST_P(BestAddressLanguageTagTest, CorrectBestAddressLanguageTag) {
- localization_.SetGetter(&GetterStub, GetParam().ui_language_tag);
+ localization_.SetGetter(&GetterStub);
const std::vector<AddressUiComponent>& components = BuildComponents(
- GetParam().region_code, localization_, &best_address_language_tag_);
+ GetParam().region_code, localization_, GetParam().ui_language_tag,
+ &best_address_language_tag_);
EXPECT_EQ(GetParam().expected_best_address_language_tag,
best_address_language_tag_);
ASSERT_FALSE(components.empty());
diff --git a/cpp/test/address_validator_test.cc b/cpp/test/address_validator_test.cc
index 5250502..79195e3 100644
--- a/cpp/test/address_validator_test.cc
+++ b/cpp/test/address_validator_test.cc
@@ -332,4 +332,87 @@ TEST_P(AddressValidatorTest, ValidateClearsProblems) {
EXPECT_EQ(expected_, problems_);
}
+TEST_P(AddressValidatorTest, ValidKanjiAddressJP) {
+ address_.region_code = "JP";
+ address_.administrative_area =
+ "\xE5\xBE\xB3\xE5\xB3\xB6\xE7\x9C\x8C"; /* 徳島県 */
+ address_.locality =
+ "\xE5\xBE\xB3\xE5\xB3\xB6\xE5\xB8\x82"; /* 徳島市 */
+ address_.postal_code = "770-0847";
+ address_.address_line.push_back("...");
+ address_.language_code = "ja";
+
+ ASSERT_NO_FATAL_FAILURE(Validate());
+ ASSERT_TRUE(called_);
+ EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidLatinAddressJP) {
+ // Skip this test case when using the OndemandSupplier, which depends on the
+ // address metadata server to map Latin script names to local script names.
+ if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+ address_.region_code = "JP";
+ address_.administrative_area = "Tokushima";
+ address_.locality = "Tokushima";
+ address_.postal_code = "770-0847";
+ address_.address_line.push_back("...");
+ address_.language_code = "ja-Latn";
+
+ ASSERT_NO_FATAL_FAILURE(Validate());
+ ASSERT_TRUE(called_);
+ EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidAddressBR) {
+ // Skip this test case when using the OndemandSupplier, which depends on the
+ // address metadata server to map natural language names to metadata IDs.
+ if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+ address_.region_code = "BR";
+ address_.administrative_area = "S\xC3\xA3o Paulo"; /* São Paulo */
+ address_.locality = "Presidente Prudente";
+ address_.postal_code = "19063-008";
+ address_.address_line.push_back("Rodovia Raposo Tavares, 6388-6682");
+ address_.language_code = "pt";
+
+ ASSERT_NO_FATAL_FAILURE(Validate());
+ ASSERT_TRUE(called_);
+ EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidAddressCA_en) {
+ // Skip this test case when using the OndemandSupplier, which depends on the
+ // address metadata server to map natural language names to metadata IDs.
+ if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+ address_.region_code = "CA";
+ address_.administrative_area = "New Brunswick";
+ address_.locality = "Saint John County";
+ address_.postal_code = "E2L 4Z6";
+ address_.address_line.push_back("...");
+ address_.language_code = "en";
+
+ ASSERT_NO_FATAL_FAILURE(Validate());
+ ASSERT_TRUE(called_);
+ EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidAddressCA_fr) {
+ // Skip this test case when using the OndemandSupplier, which depends on the
+ // address metadata server to map natural language names to metadata IDs.
+ if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+ address_.region_code = "CA";
+ address_.administrative_area = "Nouveau-Brunswick";
+ address_.locality = "Comt\xC3\xA9 de Saint-Jean"; /* Comté de Saint-Jean */
+ address_.postal_code = "E2L 4Z6";
+ address_.address_line.push_back("...");
+ address_.language_code = "fr";
+
+ ASSERT_NO_FATAL_FAILURE(Validate());
+ ASSERT_TRUE(called_);
+ EXPECT_EQ(expected_, problems_);
+}
+
} // namespace
diff --git a/cpp/test/localization_test.cc b/cpp/test/localization_test.cc
index d1c4a9d..2ed4b07 100644
--- a/cpp/test/localization_test.cc
+++ b/cpp/test/localization_test.cc
@@ -54,12 +54,10 @@ class LocalizationTest : public testing::TestWithParam<int> {
// Verifies that a custom message getter can be used.
static const char kValidMessage[] = "Data";
-static const char kValidLanguageTag[] = "tlh";
std::string GetValidMessage(int message_id) { return kValidMessage; }
TEST_P(LocalizationTest, ValidStringGetterCanBeUsed) {
- localization_.SetGetter(&GetValidMessage, kValidLanguageTag);
+ localization_.SetGetter(&GetValidMessage);
EXPECT_EQ(kValidMessage, localization_.GetString(GetParam()));
- EXPECT_EQ(kValidLanguageTag, localization_.GetLanguage());
}
// Verifies that the default language for messages does not have empty strings.
@@ -78,13 +76,6 @@ TEST_P(LocalizationTest, NoDoubleSpace) {
localization_.GetString(GetParam()).find(std::string(2U, ' ')));
}
-// Verifies that the default string is English.
-TEST_P(LocalizationTest, DefaultStringIsEnglish) {
- std::string default_string = localization_.GetString(GetParam());
- localization_.SetLanguage("en");
- EXPECT_EQ(default_string, localization_.GetString(GetParam()));
-}
-
// Tests all message identifiers.
INSTANTIATE_TEST_CASE_P(
AllMessages, LocalizationTest,
@@ -130,24 +121,17 @@ TEST_F(LocalizationTest, InvalidMessageIsEmptyString) {
EXPECT_TRUE(localization_.GetString(INVALID_MESSAGE_ID).empty());
}
-// Verifies that the default language is English.
-TEST_F(LocalizationTest, DefaultLanguageIsEnglish) {
- EXPECT_EQ("en", localization_.GetLanguage());
-}
-
TEST(LocalizationGetErrorMessageTest, MissingRequiredPostalCode) {
Localization localization;
AddressData address;
address.region_code = "CH";
- EXPECT_EQ(std::string("You must provide a postal code, for example") +
- " 2544,1211,1556,3030." +
+ EXPECT_EQ(std::string("You must provide a postal code, for example 2544.") +
" Don't know your postal code? Find it out" +
" <a href=\"http://www.post.ch/db/owa/pv_plz_pack/pr_main\">" +
"here</a>.",
localization.GetErrorMessage(address, POSTAL_CODE,
MISSING_REQUIRED_FIELD, true, true));
- EXPECT_EQ(std::string("You must provide a postal code, for example") +
- " 2544,1211,1556,3030.",
+ EXPECT_EQ("You must provide a postal code, for example 2544.",
localization.GetErrorMessage(address, POSTAL_CODE,
MISSING_REQUIRED_FIELD, true, false));
EXPECT_EQ("You can't leave this empty.",
@@ -162,15 +146,13 @@ TEST(LocalizationGetErrorMessageTest, MissingRequiredZipCode) {
Localization localization;
AddressData address;
address.region_code = "US";
- EXPECT_EQ(std::string("You must provide a ZIP code, for example") +
- " 95014,22162-1010." +
+ EXPECT_EQ(std::string("You must provide a ZIP code, for example 95014.") +
" Don't know your ZIP code? Find it out" +
" <a href=\"https://tools.usps.com/go/ZipLookupAction!" +
"input.action\">here</a>.",
localization.GetErrorMessage(address, POSTAL_CODE,
MISSING_REQUIRED_FIELD, true, true));
- EXPECT_EQ(std::string("You must provide a ZIP code, for example") +
- " 95014,22162-1010.",
+ EXPECT_EQ("You must provide a ZIP code, for example 95014.",
localization.GetErrorMessage(address, POSTAL_CODE,
MISSING_REQUIRED_FIELD, true, false));
EXPECT_EQ("You can't leave this empty.",
@@ -342,16 +324,14 @@ TEST(LocalizationGetErrorMessageTest, InvalidFormatPostalCode) {
AddressData address;
address.region_code = "CH";
EXPECT_EQ(std::string("This postal code format is not recognized. Example ") +
- "of a valid postal code:" +
- " 2544,1211,1556,3030." +
+ "of a valid postal code: 2544." +
" Don't know your postal code? Find it out" +
" <a href=\"http://www.post.ch/db/owa/pv_plz_pack/pr_main\">" +
"here</a>.",
localization.GetErrorMessage(address, POSTAL_CODE,
INVALID_FORMAT, true, true));
EXPECT_EQ(std::string("This postal code format is not recognized. Example ") +
- "of a valid postal code:" +
- " 2544,1211,1556,3030.",
+ "of a valid postal code: 2544.",
localization.GetErrorMessage(address, POSTAL_CODE,
INVALID_FORMAT, true, false));
EXPECT_EQ("This postal code format is not recognized.",
@@ -367,16 +347,14 @@ TEST(LocalizationGetErrorMessageTest, InvalidFormatZipCode) {
AddressData address;
address.region_code = "US";
EXPECT_EQ(std::string("This ZIP code format is not recognized. Example of ") +
- "a valid ZIP code:" +
- " 95014,22162-1010." +
+ "a valid ZIP code: 95014." +
" Don't know your ZIP code? Find it out" +
" <a href=\"https://tools.usps.com/go/ZipLookupAction!" +
"input.action\">here</a>.",
localization.GetErrorMessage(address, POSTAL_CODE,
INVALID_FORMAT, true, true));
EXPECT_EQ(std::string("This ZIP code format is not recognized. Example of ") +
- "a valid ZIP code:" +
- " 95014,22162-1010.",
+ "a valid ZIP code: 95014.",
localization.GetErrorMessage(address, POSTAL_CODE,
INVALID_FORMAT, true, false));
EXPECT_EQ("This ZIP code format is not recognized.",
diff --git a/cpp/test/util/string_compare_test.cc b/cpp/test/util/string_compare_test.cc
index d5990d9..8f8d4d5 100644
--- a/cpp/test/util/string_compare_test.cc
+++ b/cpp/test/util/string_compare_test.cc
@@ -25,14 +25,19 @@ using i18n::addressinput::StringCompare;
struct TestCase {
TestCase(const std::string& left,
const std::string& right,
- bool should_be_equal)
- : left(left), right(right), should_be_equal(should_be_equal) {}
+ bool should_be_equal,
+ bool should_be_less)
+ : left(left),
+ right(right),
+ should_be_equal(should_be_equal),
+ should_be_less(should_be_less) {}
~TestCase() {}
std::string left;
std::string right;
bool should_be_equal;
+ bool should_be_less;
};
class StringCompareTest : public testing::TestWithParam<TestCase> {
@@ -48,13 +53,25 @@ TEST_P(StringCompareTest, CorrectComparison) {
}
}
+TEST_P(StringCompareTest, CorrectLess) {
+ if (GetParam().should_be_less) {
+ EXPECT_TRUE(compare_.NaturalLess(GetParam().left, GetParam().right));
+ } else {
+ EXPECT_FALSE(compare_.NaturalLess(GetParam().left, GetParam().right));
+ }
+}
+
INSTANTIATE_TEST_CASE_P(
Comparisons, StringCompareTest,
- testing::Values(TestCase("foo", "foo", true),
- TestCase("foo", "FOO", true),
- TestCase("bar", "foo", false),
- TestCase("강원도", "강원도", true),
- TestCase("강원도", "대구광역시", false),
- TestCase("ZÜRICH", "zürich", true)));
+ testing::Values(TestCase("foo", "foo", true, false),
+ TestCase("foo", "FOO", true, false),
+ TestCase("bar", "foo", false, true),
+ TestCase("강원도", "강원도", true, false),
+ TestCase("강원도", "대구광역시", false, true),
+ TestCase("ZÜRICH", "zürich", true, false),
+ TestCase("абв", "где", false, true),
+ TestCase("абв", "ГДЕ", false, true),
+ TestCase("где", "абв", false, false),
+ TestCase("где", "АБВ", false, false)));
} // namespace
diff --git a/cpp/test/validation_task_test.cc b/cpp/test/validation_task_test.cc
index e8136f3..011a978 100644
--- a/cpp/test/validation_task_test.cc
+++ b/cpp/test/validation_task_test.cc
@@ -98,14 +98,14 @@ class ValidationTaskTest : public testing::Test {
&problems_,
*validated_);
- Supplier::RuleHierarchy* hierarchy = new Supplier::RuleHierarchy();
+ Supplier::RuleHierarchy hierarchy;
for (size_t i = 0; i < arraysize(json_) && json_[i] != NULL; ++i) {
ASSERT_TRUE(rule[i].ParseSerializedRule(json_[i]));
- hierarchy->rule[i] = &rule[i];
+ hierarchy.rule[i] = &rule[i];
}
- (*task->supplied_)(success_, *task->lookup_key_, *hierarchy);
+ (*task->supplied_)(success_, *task->lookup_key_, hierarchy);
}
const char* json_[arraysize(LookupKey::kHierarchy)];