diff options
-rw-r--r-- | cpp/include/libaddressinput/address_ui.h | 3 | ||||
-rw-r--r-- | cpp/include/libaddressinput/localization.h | 27 | ||||
-rw-r--r-- | cpp/include/libaddressinput/preload_supplier.h | 6 | ||||
-rw-r--r-- | cpp/src/address_ui.cc | 5 | ||||
-rw-r--r-- | cpp/src/localization.cc | 36 | ||||
-rw-r--r-- | cpp/src/preload_supplier.cc | 145 | ||||
-rw-r--r-- | cpp/src/region_data_constants.cc | 7 | ||||
-rw-r--r-- | cpp/src/util/lru_cache_using_std.h | 168 | ||||
-rw-r--r-- | cpp/src/util/string_compare.cc | 52 | ||||
-rw-r--r-- | cpp/src/util/string_compare.h | 6 | ||||
-rw-r--r-- | cpp/test/address_ui_test.cc | 17 | ||||
-rw-r--r-- | cpp/test/address_validator_test.cc | 83 | ||||
-rw-r--r-- | cpp/test/localization_test.cc | 40 | ||||
-rw-r--r-- | cpp/test/util/string_compare_test.cc | 33 | ||||
-rw-r--r-- | cpp/test/validation_task_test.cc | 6 |
15 files changed, 513 insertions, 121 deletions
diff --git a/cpp/include/libaddressinput/address_ui.h b/cpp/include/libaddressinput/address_ui.h index 8939501..cc39f6a 100644 --- a/cpp/include/libaddressinput/address_ui.h +++ b/cpp/include/libaddressinput/address_ui.h @@ -29,7 +29,7 @@ const std::vector<std::string>& GetRegionCodes(); // Returns the UI components for the CLDR |region_code|. Uses the strings from // |localization|. The components can be in default or Latin order, depending on -// the language of |localization|. +// the BCP 47 |ui_language_tag|. // // Sets the |best_address_language_tag| to the BCP 47 language tag that should // be saved with this address. This language will be used to get drop-downs to @@ -40,6 +40,7 @@ const std::vector<std::string>& GetRegionCodes(); std::vector<AddressUiComponent> BuildComponents( const std::string& region_code, const Localization& localization, + const std::string& ui_language_tag, std::string* best_address_language_tag); } // namespace addressinput diff --git a/cpp/include/libaddressinput/localization.h b/cpp/include/libaddressinput/localization.h index 08b5964..acfdf7e 100644 --- a/cpp/include/libaddressinput/localization.h +++ b/cpp/include/libaddressinput/localization.h @@ -26,17 +26,17 @@ namespace addressinput { struct AddressData; -// The object to retrieve localized strings based on message IDs. Sample usage: +// The object to retrieve localized strings based on message IDs. It returns +// English by default. Sample usage: // Localization localization; -// localization.SetLanguage("en"); // std::string best_language_tag; -// Process(BuildComponents("CA", localization, &best_language_tag)); +// Process(BuildComponents("CA", localization, "en-US", &best_language_tag)); // // Alternative usage: // Localization localization; -// localization.SetGetter(&MyStringGetter, "fr"); +// localization.SetGetter(&MyStringGetter); // std::string best_language_tag; -// Process(BuildComponents("CA", localization, &best_language_tag)); +// Process(BuildComponents("CA", localization, "fr-CA", &best_language_tag)); class Localization { public: // Initializes with English messages by default. @@ -63,17 +63,11 @@ class Localization { bool enable_examples, bool enable_links) const; - // Sets the language for the strings. The only supported language is "en" - // until we have translations. - void SetLanguage(const std::string& language_tag); - // Sets the string getter that takes a message identifier and returns the - // corresponding localized string. The |language_tag| parameter is used only - // for information purposes here. - void SetGetter(std::string (*getter)(int), const std::string& language_tag); - - // Returns the current language tag. - const std::string& GetLanguage() const { return language_tag_; } + // corresponding localized string. For example, in Chromium there is + // l10n_util::GetStringUTF8 which always returns strings in the current + // application locale. + void SetGetter(std::string (*getter)(int)); private: // Returns the error message where the address field is a postal code. Helper @@ -90,9 +84,6 @@ class Localization { // The string getter. std::string (*get_string_)(int); - - // The current language tag. - std::string language_tag_; }; } // namespace addressinput diff --git a/cpp/include/libaddressinput/preload_supplier.h b/cpp/include/libaddressinput/preload_supplier.h index f7654ab..740b4d0 100644 --- a/cpp/include/libaddressinput/preload_supplier.h +++ b/cpp/include/libaddressinput/preload_supplier.h @@ -20,14 +20,15 @@ #include <libaddressinput/util/basictypes.h> #include <libaddressinput/util/scoped_ptr.h> -#include <map> #include <set> #include <string> +#include <vector> namespace i18n { namespace addressinput { class Downloader; +class IndexMap; class LookupKey; class Retriever; class Rule; @@ -90,7 +91,8 @@ class PreloadSupplier : public Supplier { const scoped_ptr<const Retriever> retriever_; std::set<std::string> pending_; - std::map<std::string, const Rule*> rule_cache_; + const scoped_ptr<IndexMap> rule_index_; + std::vector<const Rule*> rule_storage_; DISALLOW_COPY_AND_ASSIGN(PreloadSupplier); }; diff --git a/cpp/src/address_ui.cc b/cpp/src/address_ui.cc index d515765..a099335 100644 --- a/cpp/src/address_ui.cc +++ b/cpp/src/address_ui.cc @@ -82,6 +82,7 @@ const std::vector<std::string>& GetRegionCodes() { std::vector<AddressUiComponent> BuildComponents( const std::string& region_code, const Localization& localization, + const std::string& ui_language_tag, std::string* best_address_language_tag) { assert(best_address_language_tag != NULL); std::vector<AddressUiComponent> result; @@ -93,8 +94,8 @@ std::vector<AddressUiComponent> BuildComponents( return result; } - const Language& best_address_language = ChooseBestAddressLanguage( - rule, Language(localization.GetLanguage())); + const Language& best_address_language = + ChooseBestAddressLanguage(rule, Language(ui_language_tag)); *best_address_language_tag = best_address_language.tag; const std::vector<FormatElement>& format = diff --git a/cpp/src/localization.cc b/cpp/src/localization.cc index 951f1a7..9bb32a5 100644 --- a/cpp/src/localization.cc +++ b/cpp/src/localization.cc @@ -25,6 +25,7 @@ #include "grit.h" #include "region_data_constants.h" #include "rule.h" +#include "util/string_split.h" #include "util/string_util.h" namespace { @@ -42,28 +43,16 @@ namespace addressinput { namespace { -static const char kDefaultLanguage[] = "en"; - -// For each language XX with translations: -// (1) Add a namespace XX here with an include of "XX_messages.cc". -// (2) Add a wrapper that converts the char pointer to std::string. (GRIT -// generated functions return char pointers.) -// (2) Use the XX::GetStdString in the SetLanguage() method below. -namespace en { - #include "en_messages.cc" -std::string GetStdString(int message_id) { +std::string GetEnglishString(int message_id) { const char* str = GetString(message_id); return str != NULL ? std::string(str) : std::string(); } -} // namespace en - } // namespace -Localization::Localization() : get_string_(&en::GetStdString), - language_tag_(kDefaultLanguage) {} +Localization::Localization() : get_string_(&GetEnglishString) {} Localization::~Localization() {} @@ -83,7 +72,11 @@ std::string Localization::GetErrorMessage(const AddressData& address, if (rule.ParseSerializedRule( RegionDataConstants::GetRegionData(address.region_code))) { if (enable_examples) { - postal_code_example = rule.GetPostalCodeExample(); + std::vector<std::string> examples_list; + SplitString(rule.GetPostalCodeExample(), ',', &examples_list); + if (!examples_list.empty()) { + postal_code_example = examples_list.front(); + } } if (enable_links) { post_service_url = rule.GetPostServiceUrl(); @@ -124,20 +117,9 @@ std::string Localization::GetErrorMessage(const AddressData& address, } } -void Localization::SetLanguage(const std::string& language_tag) { - if (language_tag == kDefaultLanguage) { - get_string_ = &en::GetStdString; - } else { - assert(false); - } - language_tag_ = language_tag; -} - -void Localization::SetGetter(std::string (*getter)(int), - const std::string& language_tag) { +void Localization::SetGetter(std::string (*getter)(int)) { assert(getter != NULL); get_string_ = getter; - language_tag_ = language_tag; } std::string Localization::GetErrorMessageForPostalCode( diff --git a/cpp/src/preload_supplier.cc b/cpp/src/preload_supplier.cc index 5219e9a..31325bd 100644 --- a/cpp/src/preload_supplier.cc +++ b/cpp/src/preload_supplier.cc @@ -21,10 +21,13 @@ #include <libaddressinput/util/basictypes.h> #include <libaddressinput/util/scoped_ptr.h> +#include <algorithm> #include <cassert> #include <cstddef> +#include <functional> #include <map> #include <set> +#include <stack> #include <string> #include <utility> #include <vector> @@ -35,12 +38,36 @@ #include "retriever.h" #include "rule.h" #include "util/json.h" +#include "util/string_compare.h" namespace i18n { namespace addressinput { namespace { +// STL predicate less<> that uses StringCompare to match strings that a human +// reader would consider to be "the same". The default implementation just does +// case insensitive string comparison, but StringCompare can be overriden with +// more sophisticated implementations. +class IndexLess : public std::binary_function<std::string, std::string, bool> { + public: + result_type operator()(const first_argument_type& a, + const second_argument_type& b) const { + return kStringCompare.NaturalLess(a, b); + } + + private: + static const StringCompare kStringCompare; +}; + +const StringCompare IndexLess::kStringCompare; + +} // namespace + +class IndexMap : public std::map<std::string, const Rule*, IndexLess> {}; + +namespace { + class Helper { public: // Does not take ownership of its parameters. @@ -49,14 +76,17 @@ class Helper { const PreloadSupplier::Callback& loaded, const Retriever& retriever, std::set<std::string>* pending, - std::map<std::string, const Rule*>* rule_cache) + IndexMap* rule_index, + std::vector<const Rule*>* rule_storage) : region_code_(region_code), loaded_(loaded), pending_(pending), - rule_cache_(rule_cache), + rule_index_(rule_index), + rule_storage_(rule_storage), retrieved_(BuildCallback(this, &Helper::OnRetrieved)) { assert(pending_ != NULL); - assert(rule_cache_ != NULL); + assert(rule_index_ != NULL); + assert(rule_storage_ != NULL); assert(retrieved_ != NULL); pending_->insert(key); retriever.Retrieve(key, *retrieved_); @@ -75,6 +105,7 @@ class Helper { (void)status; // Prevent unused variable if assert() is optimized away. Json json; + std::vector<const Rule*> sub_rules; if (!success) { goto callback; @@ -112,13 +143,95 @@ class Helper { rule->ParseJsonRule(value); assert(id == rule->GetId()); // Sanity check. - std::pair<std::map<std::string, const Rule*>::iterator, bool> result = - rule_cache_->insert(std::make_pair(rule->GetId(), rule)); + rule_storage_->push_back(rule); + if (depth > 0) { + sub_rules.push_back(rule); + } + + // Add the ID of this Rule object to the rule index. + std::pair<IndexMap::iterator, bool> result = + rule_index_->insert(std::make_pair(id, rule)); assert(result.second); (void)result; // Prevent unused variable if assert() is optimized away. + ++rule_count; } + /* + * Normally the address metadata server takes care of mapping from natural + * language names to metadata IDs (eg. "São Paulo" -> "SP") and from Latin + * script names to local script names (eg. "Tokushima" -> "徳島県"). + * + * As the PreloadSupplier doesn't contact the metadata server upon each + * Supply() request, it instead has an internal lookup table (rule_index_) + * that contains such mappings. + * + * This lookup table is populated by iterating over all sub rules and for + * each of them construct ID strings using human readable names (eg. "São + * Paulo") and using Latin script names (eg. "Tokushima"). + */ + for (std::vector<const Rule*>::const_iterator + it = sub_rules.begin(); it != sub_rules.end(); ++it) { + std::stack<const Rule*> hierarchy; + hierarchy.push(*it); + + // Push pointers to all parent Rule objects onto the hierarchy stack. + for (std::string parent_id((*it)->GetId());;) { + // Strip the last part of parent_id. Break if COUNTRY level is reached. + std::string::size_type pos = parent_id.rfind('/'); + if (pos == sizeof "data/ZZ" - 1) { + break; + } + parent_id.resize(pos); + + IndexMap::const_iterator jt = rule_index_->find(parent_id); + assert(jt != rule_index_->end()); + hierarchy.push(jt->second); + } + + std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1)); + std::string latin_id(human_id); + + // Append the names from all Rule objects on the hierarchy stack. + for (; !hierarchy.empty(); hierarchy.pop()) { + const Rule* rule = hierarchy.top(); + + human_id.push_back('/'); + if (!rule->GetName().empty()) { + human_id.append(rule->GetName()); + } else { + // If the "name" field is empty, the name is the last part of the ID. + const std::string& id = rule->GetId(); + std::string::size_type pos = id.rfind('/'); + assert(pos != std::string::npos); + human_id.append(id.substr(pos + 1)); + } + + if (!rule->GetLatinName().empty()) { + latin_id.push_back('/'); + latin_id.append(rule->GetLatinName()); + } + } + + // If the ID has a language tag, copy it. + { + const std::string& id = (*it)->GetId(); + std::string::size_type pos = id.rfind("--"); + if (pos != std::string::npos) { + human_id.append(id, pos, id.size() - pos); + } + } + + rule_index_->insert(std::make_pair(human_id, *it)); + + // Add the Latin script ID, if a Latin script name could be found for + // every part of the ID. + if (std::count(human_id.begin(), human_id.end(), '/') == + std::count(latin_id.begin(), latin_id.end(), '/')) { + rule_index_->insert(std::make_pair(latin_id, *it)); + } + } + callback: loaded_(success, region_code_, rule_count); delete this; @@ -127,7 +240,8 @@ class Helper { const std::string region_code_; const PreloadSupplier::Callback& loaded_; std::set<std::string>* const pending_; - std::map<std::string, const Rule*>* const rule_cache_; + IndexMap* const rule_index_; + std::vector<const Rule*>* const rule_storage_; const scoped_ptr<const Retriever::Callback> retrieved_; DISALLOW_COPY_AND_ASSIGN(Helper); @@ -148,12 +262,13 @@ PreloadSupplier::PreloadSupplier(const std::string& validation_data_url, Storage* storage) : retriever_(new Retriever(validation_data_url, downloader, storage)), pending_(), - rule_cache_() {} + rule_index_(new IndexMap), + rule_storage_() {} PreloadSupplier::~PreloadSupplier() { - for (std::map<std::string, const Rule*>::const_iterator - it = rule_cache_.begin(); it != rule_cache_.end(); ++it) { - delete it->second; + for (std::vector<const Rule*>::const_iterator + it = rule_storage_.begin(); it != rule_storage_.end(); ++it) { + delete *it; } } @@ -192,7 +307,8 @@ void PreloadSupplier::LoadRules(const std::string& region_code, loaded, *retriever_, &pending_, - &rule_cache_); + rule_index_.get(), + &rule_storage_); } bool PreloadSupplier::IsLoaded(const std::string& region_code) const { @@ -214,9 +330,8 @@ bool PreloadSupplier::GetRuleHierarchy(const LookupKey& lookup_key, for (size_t depth = 0; depth <= max_depth; ++depth) { const std::string& key = lookup_key.ToKeyString(depth); - std::map<std::string, const Rule*>::const_iterator it = - rule_cache_.find(key); - if (it == rule_cache_.end()) { + IndexMap::const_iterator it = rule_index_->find(key); + if (it == rule_index_->end()) { return depth > 0; // No data on COUNTRY level is failure. } hierarchy->rule[depth] = it->second; @@ -227,7 +342,7 @@ bool PreloadSupplier::GetRuleHierarchy(const LookupKey& lookup_key, } bool PreloadSupplier::IsLoadedKey(const std::string& key) const { - return rule_cache_.find(key) != rule_cache_.end(); + return rule_index_->find(key) != rule_index_->end(); } bool PreloadSupplier::IsPendingKey(const std::string& key) const { diff --git a/cpp/src/region_data_constants.cc b/cpp/src/region_data_constants.cc index f3d9c3c..cabee28 100644 --- a/cpp/src/region_data_constants.cc +++ b/cpp/src/region_data_constants.cc @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // -// The data in this file will be automatically generated. For now, the data -// comes from: -// -// https://code.google.com/p/libaddressinput/source/browse/trunk/java/src/com/android/i18n/addressinput/RegionDataConstants.java?r=137 +// The data in this file is automatically generated. #include "region_data_constants.h" @@ -1263,7 +1260,7 @@ std::map<std::string, std::string> InitRegionData() { "\"languages\":\"en\"" "}")); region_data.insert(std::make_pair("US", "{" - "\"fmt\":\"%N%n%O%n%A%n%C %S %Z\"," + "\"fmt\":\"%N%n%O%n%A%n%C, %S %Z\"," "\"require\":\"ACSZ\"," "\"zip_name_type\":\"zip\"," "\"state_name_type\":\"state\"," diff --git a/cpp/src/util/lru_cache_using_std.h b/cpp/src/util/lru_cache_using_std.h new file mode 100644 index 0000000..25aced7 --- /dev/null +++ b/cpp/src/util/lru_cache_using_std.h @@ -0,0 +1,168 @@ +/******************************************************************************/ +/* Copyright (c) 2010-2011, Tim Day <timday@timday.com> */ +/* */ +/* Permission to use, copy, modify, and/or distribute this software for any */ +/* purpose with or without fee is hereby granted, provided that the above */ +/* copyright notice and this permission notice appear in all copies. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES */ +/* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR */ +/* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ +/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN */ +/* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF */ +/* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +/******************************************************************************/ + +// The original source code is from: +// https://bitbucket.org/timday/lru_cache/src/497822a492a8/include/lru_cache_using_std.h + +#ifndef I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_ +#define I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_ + +#include <cassert> +#include <list> +#include <map> + +// Class providing fixed-size (by number of records) +// LRU-replacement cache of a function with signature +// V f(K). +// The default comparator/hash/allocator will be used. +template < + typename K, + typename V + > class lru_cache_using_std +{ +public: + + typedef K key_type; + typedef V value_type; + + // Key access history, most recent at back + typedef std::list<key_type> key_tracker_type; + + // Key to value and key history iterator + typedef std::map< + key_type, + std::pair< + value_type, + typename key_tracker_type::iterator + > + > key_to_value_type; + + // Constuctor specifies the cached function and + // the maximum number of records to be stored + lru_cache_using_std( + value_type (*f)(const key_type&), + size_t c + ) + :_fn(f) + ,_capacity(c) + { + assert(_capacity!=0); + } + + // Obtain value of the cached function for k + value_type operator()(const key_type& k) { + + // Attempt to find existing record + const typename key_to_value_type::iterator it + =_key_to_value.find(k); + + if (it==_key_to_value.end()) { + + // We don't have it: + + // Evaluate function and create new record + const value_type v=_fn(k); + insert(k,v); + + // Return the freshly computed value + return v; + + } else { + + // We do have it: + + // Update access record by moving + // accessed key to back of list + _key_tracker.splice( + _key_tracker.end(), + _key_tracker, + (*it).second.second + ); + + // Return the retrieved value + return (*it).second.first; + } + } + + // Obtain the cached keys, most recently used element + // at head, least recently used at tail. + // This method is provided purely to support testing. + template <typename IT> void get_keys(IT dst) const { + typename key_tracker_type::const_reverse_iterator src + =_key_tracker.rbegin(); + while (src!=_key_tracker.rend()) { + *dst++ = *src++; + } + } + +private: + + // Record a fresh key-value pair in the cache + void insert(const key_type& k,const value_type& v) { + + // Method is only called on cache misses + assert(_key_to_value.find(k)==_key_to_value.end()); + + // Make space if necessary + if (_key_to_value.size()==_capacity) + evict(); + + // Record k as most-recently-used key + typename key_tracker_type::iterator it + =_key_tracker.insert(_key_tracker.end(),k); + + // Create the key-value entry, + // linked to the usage record. + _key_to_value.insert( + std::make_pair( + k, + std::make_pair(v,it) + ) + ); + // No need to check return, + // given previous assert. + } + + // Purge the least-recently-used element in the cache + void evict() { + + // Assert method is never called when cache is empty + assert(!_key_tracker.empty()); + + // Identify least recently used key + const typename key_to_value_type::iterator it + =_key_to_value.find(_key_tracker.front()); + assert(it!=_key_to_value.end()); + + // Erase both elements to completely purge record + _key_to_value.erase(it); + _key_tracker.pop_front(); + } + + // The function to be cached + value_type (*_fn)(const key_type&); + + // Maximum number of key-value pairs to be retained + const size_t _capacity; + + // Key access history + key_tracker_type _key_tracker; + + // Key-to-value lookup + key_to_value_type _key_to_value; +}; + +#endif // I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_ diff --git a/cpp/src/util/string_compare.cc b/cpp/src/util/string_compare.cc index c63b138..31a7534 100644 --- a/cpp/src/util/string_compare.cc +++ b/cpp/src/util/string_compare.cc @@ -12,20 +12,54 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "re2ptr.h" // Must be the first #include statement! - #include "string_compare.h" #include <libaddressinput/util/basictypes.h> +#include <cassert> #include <string> +#include <re2/re2.h> + +#include "lru_cache_using_std.h" + +// RE2 uses type string, which is not necessarily the same as type std::string. +// In order to create objects of the correct type, to be able to pass pointers +// to these objects to RE2, the function that does that is defined inside an +// unnamed namespace inside the re2 namespace. Oh, my ... +namespace re2 { +namespace { + +// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function +// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would +// be a case-insensitive match to the string. This is far too expensive to do +// repeatedly, so the function is only ever called through an LRU cache. +std::string ComputeMinPossibleMatch(const std::string& str) { + string min, max; // N.B.: RE2 type string! + + RE2::Options options; + options.set_literal(true); + options.set_case_sensitive(false); + RE2 matcher(str, options); + + bool success = matcher.PossibleMatchRange(&min, &max, str.size()); + assert(success); + (void)success; // Prevent unused variable if assert() is optimized away. + + return min; +} + +} // namespace +} // namespace re2 + namespace i18n { namespace addressinput { class StringCompare::Impl { + enum { MAX_CACHE_SIZE = 1 << 15 }; + public: - Impl() { + Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) { options_.set_literal(true); options_.set_case_sensitive(false); } @@ -37,8 +71,15 @@ class StringCompare::Impl { return RE2::FullMatch(a, matcher); } + bool NaturalLess(const std::string& a, const std::string& b) const { + const std::string& min_a(min_possible_match_(a)); + const std::string& min_b(min_possible_match_(b)); + return min_a < min_b; + } + private: RE2::Options options_; + mutable lru_cache_using_std<std::string, std::string> min_possible_match_; DISALLOW_COPY_AND_ASSIGN(Impl); }; @@ -52,5 +93,10 @@ bool StringCompare::NaturalEquals(const std::string& a, return impl_->NaturalEquals(a, b); } +bool StringCompare::NaturalLess(const std::string& a, + const std::string& b) const { + return impl_->NaturalLess(a, b); +} + } // namespace addressinput } // namespace i18n diff --git a/cpp/src/util/string_compare.h b/cpp/src/util/string_compare.h index 9d530fa..ae680dd 100644 --- a/cpp/src/util/string_compare.h +++ b/cpp/src/util/string_compare.h @@ -33,6 +33,12 @@ class StringCompare { // default implementation just does case insensitive string matching. bool NaturalEquals(const std::string& a, const std::string& b) const; + // Comparison function for use with the STL analogous to NaturalEquals(). + // Libaddressinput itself isn't really concerned about how this is done, as + // long as it conforms to the STL requirements on less<> predicates. This + // default implementation is VERY SLOW! Must be replaced if you need speed. + bool NaturalLess(const std::string& a, const std::string& b) const; + private: class Impl; scoped_ptr<Impl> impl_; diff --git a/cpp/test/address_ui_test.cc b/cpp/test/address_ui_test.cc index b36ea8c..c23d205 100644 --- a/cpp/test/address_ui_test.cc +++ b/cpp/test/address_ui_test.cc @@ -37,6 +37,8 @@ using i18n::addressinput::POSTAL_CODE; using i18n::addressinput::RECIPIENT; using i18n::addressinput::STREET_ADDRESS; +static const char kUiLanguageTag[] = "en"; + // Returns testing::AssertionSuccess if the |components| are valid. Uses // |region_code| in test failure messages. testing::AssertionResult ComponentsAreValid( @@ -81,14 +83,15 @@ TEST_P(AddressUiTest, RegionCodeHasTwoCharacters) { // code. TEST_P(AddressUiTest, ComponentsAreValid) { EXPECT_TRUE(ComponentsAreValid(BuildComponents( - GetParam(), localization_, &best_address_language_tag_))); + GetParam(), localization_, kUiLanguageTag, &best_address_language_tag_))); } // Verifies that BuildComponents() returns at most one input field of each type. TEST_P(AddressUiTest, UniqueFieldTypes) { std::set<AddressField> fields; const std::vector<AddressUiComponent>& components = - BuildComponents(GetParam(), localization_, &best_address_language_tag_); + BuildComponents(GetParam(), localization_, kUiLanguageTag, + &best_address_language_tag_); for (std::vector<AddressUiComponent>::const_iterator it = components.begin(); it != components.end(); ++it) { EXPECT_TRUE(fields.insert(it->field).second); @@ -103,8 +106,9 @@ INSTANTIATE_TEST_CASE_P( // Verifies that BuildComponents() returns an empty vector for an invalid region // code. TEST_F(AddressUiTest, InvalidRegionCodeReturnsEmptyVector) { - EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_, - &best_address_language_tag_).empty()); + EXPECT_TRUE(BuildComponents( + "INVALID-REGION-CODE", localization_, kUiLanguageTag, + &best_address_language_tag_).empty()); } // Test data for determining the best language tag and whether the right format @@ -145,9 +149,10 @@ class BestAddressLanguageTagTest std::string GetterStub(int) { return std::string(); } TEST_P(BestAddressLanguageTagTest, CorrectBestAddressLanguageTag) { - localization_.SetGetter(&GetterStub, GetParam().ui_language_tag); + localization_.SetGetter(&GetterStub); const std::vector<AddressUiComponent>& components = BuildComponents( - GetParam().region_code, localization_, &best_address_language_tag_); + GetParam().region_code, localization_, GetParam().ui_language_tag, + &best_address_language_tag_); EXPECT_EQ(GetParam().expected_best_address_language_tag, best_address_language_tag_); ASSERT_FALSE(components.empty()); diff --git a/cpp/test/address_validator_test.cc b/cpp/test/address_validator_test.cc index 5250502..79195e3 100644 --- a/cpp/test/address_validator_test.cc +++ b/cpp/test/address_validator_test.cc @@ -332,4 +332,87 @@ TEST_P(AddressValidatorTest, ValidateClearsProblems) { EXPECT_EQ(expected_, problems_); } +TEST_P(AddressValidatorTest, ValidKanjiAddressJP) { + address_.region_code = "JP"; + address_.administrative_area = + "\xE5\xBE\xB3\xE5\xB3\xB6\xE7\x9C\x8C"; /* 徳島県 */ + address_.locality = + "\xE5\xBE\xB3\xE5\xB3\xB6\xE5\xB8\x82"; /* 徳島市 */ + address_.postal_code = "770-0847"; + address_.address_line.push_back("..."); + address_.language_code = "ja"; + + ASSERT_NO_FATAL_FAILURE(Validate()); + ASSERT_TRUE(called_); + EXPECT_EQ(expected_, problems_); +} + +TEST_P(AddressValidatorTest, ValidLatinAddressJP) { + // Skip this test case when using the OndemandSupplier, which depends on the + // address metadata server to map Latin script names to local script names. + if (GetParam() == &OndemandValidatorWrapper::Build) return; + + address_.region_code = "JP"; + address_.administrative_area = "Tokushima"; + address_.locality = "Tokushima"; + address_.postal_code = "770-0847"; + address_.address_line.push_back("..."); + address_.language_code = "ja-Latn"; + + ASSERT_NO_FATAL_FAILURE(Validate()); + ASSERT_TRUE(called_); + EXPECT_EQ(expected_, problems_); +} + +TEST_P(AddressValidatorTest, ValidAddressBR) { + // Skip this test case when using the OndemandSupplier, which depends on the + // address metadata server to map natural language names to metadata IDs. + if (GetParam() == &OndemandValidatorWrapper::Build) return; + + address_.region_code = "BR"; + address_.administrative_area = "S\xC3\xA3o Paulo"; /* São Paulo */ + address_.locality = "Presidente Prudente"; + address_.postal_code = "19063-008"; + address_.address_line.push_back("Rodovia Raposo Tavares, 6388-6682"); + address_.language_code = "pt"; + + ASSERT_NO_FATAL_FAILURE(Validate()); + ASSERT_TRUE(called_); + EXPECT_EQ(expected_, problems_); +} + +TEST_P(AddressValidatorTest, ValidAddressCA_en) { + // Skip this test case when using the OndemandSupplier, which depends on the + // address metadata server to map natural language names to metadata IDs. + if (GetParam() == &OndemandValidatorWrapper::Build) return; + + address_.region_code = "CA"; + address_.administrative_area = "New Brunswick"; + address_.locality = "Saint John County"; + address_.postal_code = "E2L 4Z6"; + address_.address_line.push_back("..."); + address_.language_code = "en"; + + ASSERT_NO_FATAL_FAILURE(Validate()); + ASSERT_TRUE(called_); + EXPECT_EQ(expected_, problems_); +} + +TEST_P(AddressValidatorTest, ValidAddressCA_fr) { + // Skip this test case when using the OndemandSupplier, which depends on the + // address metadata server to map natural language names to metadata IDs. + if (GetParam() == &OndemandValidatorWrapper::Build) return; + + address_.region_code = "CA"; + address_.administrative_area = "Nouveau-Brunswick"; + address_.locality = "Comt\xC3\xA9 de Saint-Jean"; /* Comté de Saint-Jean */ + address_.postal_code = "E2L 4Z6"; + address_.address_line.push_back("..."); + address_.language_code = "fr"; + + ASSERT_NO_FATAL_FAILURE(Validate()); + ASSERT_TRUE(called_); + EXPECT_EQ(expected_, problems_); +} + } // namespace diff --git a/cpp/test/localization_test.cc b/cpp/test/localization_test.cc index d1c4a9d..2ed4b07 100644 --- a/cpp/test/localization_test.cc +++ b/cpp/test/localization_test.cc @@ -54,12 +54,10 @@ class LocalizationTest : public testing::TestWithParam<int> { // Verifies that a custom message getter can be used. static const char kValidMessage[] = "Data"; -static const char kValidLanguageTag[] = "tlh"; std::string GetValidMessage(int message_id) { return kValidMessage; } TEST_P(LocalizationTest, ValidStringGetterCanBeUsed) { - localization_.SetGetter(&GetValidMessage, kValidLanguageTag); + localization_.SetGetter(&GetValidMessage); EXPECT_EQ(kValidMessage, localization_.GetString(GetParam())); - EXPECT_EQ(kValidLanguageTag, localization_.GetLanguage()); } // Verifies that the default language for messages does not have empty strings. @@ -78,13 +76,6 @@ TEST_P(LocalizationTest, NoDoubleSpace) { localization_.GetString(GetParam()).find(std::string(2U, ' '))); } -// Verifies that the default string is English. -TEST_P(LocalizationTest, DefaultStringIsEnglish) { - std::string default_string = localization_.GetString(GetParam()); - localization_.SetLanguage("en"); - EXPECT_EQ(default_string, localization_.GetString(GetParam())); -} - // Tests all message identifiers. INSTANTIATE_TEST_CASE_P( AllMessages, LocalizationTest, @@ -130,24 +121,17 @@ TEST_F(LocalizationTest, InvalidMessageIsEmptyString) { EXPECT_TRUE(localization_.GetString(INVALID_MESSAGE_ID).empty()); } -// Verifies that the default language is English. -TEST_F(LocalizationTest, DefaultLanguageIsEnglish) { - EXPECT_EQ("en", localization_.GetLanguage()); -} - TEST(LocalizationGetErrorMessageTest, MissingRequiredPostalCode) { Localization localization; AddressData address; address.region_code = "CH"; - EXPECT_EQ(std::string("You must provide a postal code, for example") + - " 2544,1211,1556,3030." + + EXPECT_EQ(std::string("You must provide a postal code, for example 2544.") + " Don't know your postal code? Find it out" + " <a href=\"http://www.post.ch/db/owa/pv_plz_pack/pr_main\">" + "here</a>.", localization.GetErrorMessage(address, POSTAL_CODE, MISSING_REQUIRED_FIELD, true, true)); - EXPECT_EQ(std::string("You must provide a postal code, for example") + - " 2544,1211,1556,3030.", + EXPECT_EQ("You must provide a postal code, for example 2544.", localization.GetErrorMessage(address, POSTAL_CODE, MISSING_REQUIRED_FIELD, true, false)); EXPECT_EQ("You can't leave this empty.", @@ -162,15 +146,13 @@ TEST(LocalizationGetErrorMessageTest, MissingRequiredZipCode) { Localization localization; AddressData address; address.region_code = "US"; - EXPECT_EQ(std::string("You must provide a ZIP code, for example") + - " 95014,22162-1010." + + EXPECT_EQ(std::string("You must provide a ZIP code, for example 95014.") + " Don't know your ZIP code? Find it out" + " <a href=\"https://tools.usps.com/go/ZipLookupAction!" + "input.action\">here</a>.", localization.GetErrorMessage(address, POSTAL_CODE, MISSING_REQUIRED_FIELD, true, true)); - EXPECT_EQ(std::string("You must provide a ZIP code, for example") + - " 95014,22162-1010.", + EXPECT_EQ("You must provide a ZIP code, for example 95014.", localization.GetErrorMessage(address, POSTAL_CODE, MISSING_REQUIRED_FIELD, true, false)); EXPECT_EQ("You can't leave this empty.", @@ -342,16 +324,14 @@ TEST(LocalizationGetErrorMessageTest, InvalidFormatPostalCode) { AddressData address; address.region_code = "CH"; EXPECT_EQ(std::string("This postal code format is not recognized. Example ") + - "of a valid postal code:" + - " 2544,1211,1556,3030." + + "of a valid postal code: 2544." + " Don't know your postal code? Find it out" + " <a href=\"http://www.post.ch/db/owa/pv_plz_pack/pr_main\">" + "here</a>.", localization.GetErrorMessage(address, POSTAL_CODE, INVALID_FORMAT, true, true)); EXPECT_EQ(std::string("This postal code format is not recognized. Example ") + - "of a valid postal code:" + - " 2544,1211,1556,3030.", + "of a valid postal code: 2544.", localization.GetErrorMessage(address, POSTAL_CODE, INVALID_FORMAT, true, false)); EXPECT_EQ("This postal code format is not recognized.", @@ -367,16 +347,14 @@ TEST(LocalizationGetErrorMessageTest, InvalidFormatZipCode) { AddressData address; address.region_code = "US"; EXPECT_EQ(std::string("This ZIP code format is not recognized. Example of ") + - "a valid ZIP code:" + - " 95014,22162-1010." + + "a valid ZIP code: 95014." + " Don't know your ZIP code? Find it out" + " <a href=\"https://tools.usps.com/go/ZipLookupAction!" + "input.action\">here</a>.", localization.GetErrorMessage(address, POSTAL_CODE, INVALID_FORMAT, true, true)); EXPECT_EQ(std::string("This ZIP code format is not recognized. Example of ") + - "a valid ZIP code:" + - " 95014,22162-1010.", + "a valid ZIP code: 95014.", localization.GetErrorMessage(address, POSTAL_CODE, INVALID_FORMAT, true, false)); EXPECT_EQ("This ZIP code format is not recognized.", diff --git a/cpp/test/util/string_compare_test.cc b/cpp/test/util/string_compare_test.cc index d5990d9..8f8d4d5 100644 --- a/cpp/test/util/string_compare_test.cc +++ b/cpp/test/util/string_compare_test.cc @@ -25,14 +25,19 @@ using i18n::addressinput::StringCompare; struct TestCase { TestCase(const std::string& left, const std::string& right, - bool should_be_equal) - : left(left), right(right), should_be_equal(should_be_equal) {} + bool should_be_equal, + bool should_be_less) + : left(left), + right(right), + should_be_equal(should_be_equal), + should_be_less(should_be_less) {} ~TestCase() {} std::string left; std::string right; bool should_be_equal; + bool should_be_less; }; class StringCompareTest : public testing::TestWithParam<TestCase> { @@ -48,13 +53,25 @@ TEST_P(StringCompareTest, CorrectComparison) { } } +TEST_P(StringCompareTest, CorrectLess) { + if (GetParam().should_be_less) { + EXPECT_TRUE(compare_.NaturalLess(GetParam().left, GetParam().right)); + } else { + EXPECT_FALSE(compare_.NaturalLess(GetParam().left, GetParam().right)); + } +} + INSTANTIATE_TEST_CASE_P( Comparisons, StringCompareTest, - testing::Values(TestCase("foo", "foo", true), - TestCase("foo", "FOO", true), - TestCase("bar", "foo", false), - TestCase("강원도", "강원도", true), - TestCase("강원도", "대구광역시", false), - TestCase("ZÜRICH", "zürich", true))); + testing::Values(TestCase("foo", "foo", true, false), + TestCase("foo", "FOO", true, false), + TestCase("bar", "foo", false, true), + TestCase("강원도", "강원도", true, false), + TestCase("강원도", "대구광역시", false, true), + TestCase("ZÜRICH", "zürich", true, false), + TestCase("абв", "где", false, true), + TestCase("абв", "ГДЕ", false, true), + TestCase("где", "абв", false, false), + TestCase("где", "АБВ", false, false))); } // namespace diff --git a/cpp/test/validation_task_test.cc b/cpp/test/validation_task_test.cc index e8136f3..011a978 100644 --- a/cpp/test/validation_task_test.cc +++ b/cpp/test/validation_task_test.cc @@ -98,14 +98,14 @@ class ValidationTaskTest : public testing::Test { &problems_, *validated_); - Supplier::RuleHierarchy* hierarchy = new Supplier::RuleHierarchy(); + Supplier::RuleHierarchy hierarchy; for (size_t i = 0; i < arraysize(json_) && json_[i] != NULL; ++i) { ASSERT_TRUE(rule[i].ParseSerializedRule(json_[i])); - hierarchy->rule[i] = &rule[i]; + hierarchy.rule[i] = &rule[i]; } - (*task->supplied_)(success_, *task->lookup_key_, *hierarchy); + (*task->supplied_)(success_, *task->lookup_key_, hierarchy); } const char* json_[arraysize(LookupKey::kHierarchy)]; |