From 0ddcd24a82f60578ab843feae5bea5545989cb96 Mon Sep 17 00:00:00 2001 From: "rouslan@chromium.org" Date: Tue, 29 Jul 2014 16:18:42 +0000 Subject: Improve efficiency of PreloadSupplier and RegionDataBuilder. before: RegionDataBuilderTest.BuildCnRegionTree (8485 ms) after: RegionDataBuilderTest.BuildCnRegionTree (5765 ms) => 32% improvement. (Built using 'g++ -g -pg -O0') R=roubert@google.com Review URL: https://codereview.appspot.com/111390043 git-svn-id: http://libaddressinput.googlecode.com/svn/trunk@322 38ededc0-08b8-5190-f2ac-b31f878777ad --- cpp/include/libaddressinput/preload_supplier.h | 7 +++ cpp/src/preload_supplier.cc | 71 ++++++++++++++------- cpp/src/region_data_builder.cc | 87 ++++++++++++++++++-------- cpp/src/util/json.cc | 76 ++++++---------------- cpp/src/util/json.h | 14 ++--- cpp/test/preload_supplier_test.cc | 8 +++ cpp/test/util/json_test.cc | 35 ++++++----- 7 files changed, 166 insertions(+), 132 deletions(-) diff --git a/cpp/include/libaddressinput/preload_supplier.h b/cpp/include/libaddressinput/preload_supplier.h index 740b4d0..a4025cb 100644 --- a/cpp/include/libaddressinput/preload_supplier.h +++ b/cpp/include/libaddressinput/preload_supplier.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -80,6 +81,11 @@ class PreloadSupplier : public Supplier { // Calls |loaded| when the loading has finished. void LoadRules(const std::string& region_code, const Callback& loaded); + // Returns a mapping of lookup keys to rules. Should be called only when + // IsLoaded() returns true for the |region_code|. + const std::map& GetRulesForRegion( + const std::string& region_code) const; + bool IsLoaded(const std::string& region_code) const; bool IsPending(const std::string& region_code) const; @@ -93,6 +99,7 @@ class PreloadSupplier : public Supplier { std::set pending_; const scoped_ptr rule_index_; std::vector rule_storage_; + std::map > region_rules_; DISALLOW_COPY_AND_ASSIGN(PreloadSupplier); }; diff --git a/cpp/src/preload_supplier.cc b/cpp/src/preload_supplier.cc index 26f7e2b..2557e33 100644 --- a/cpp/src/preload_supplier.cc +++ b/cpp/src/preload_supplier.cc @@ -72,16 +72,19 @@ class Helper { const Retriever& retriever, std::set* pending, IndexMap* rule_index, - std::vector* rule_storage) + std::vector* rule_storage, + std::map* region_rules) : region_code_(region_code), loaded_(loaded), pending_(pending), rule_index_(rule_index), rule_storage_(rule_storage), + region_rules_(region_rules), retrieved_(BuildCallback(this, &Helper::OnRetrieved)) { assert(pending_ != NULL); assert(rule_index_ != NULL); assert(rule_storage_ != NULL); + assert(region_rules_ != NULL); assert(retrieved_ != NULL); pending_->insert(key); retriever.Retrieve(key, *retrieved_); @@ -103,6 +106,14 @@ class Helper { std::string id; std::vector sub_rules; + IndexMap::iterator last_index_it = rule_index_->end(); + IndexMap::iterator last_latin_it = rule_index_->end(); + std::map::iterator last_region_it = + region_rules_->end(); + + IndexMap::const_iterator hints[arraysize(LookupKey::kHierarchy) - 1]; + std::fill(hints, hints + arraysize(hints), rule_index_->end()); + if (!success) { goto callback; } @@ -112,19 +123,17 @@ class Helper { goto callback; } - for (std::vector::const_iterator - it = json.GetKeys().begin(); it != json.GetKeys().end(); ++it) { - const Json* value; - if (!json.GetDictionaryValueForKey(*it, &value)) { - success = false; - goto callback; - } - + for (std::vector::const_iterator + it = json.GetSubDictionaries().begin(); + it != json.GetSubDictionaries().end(); + ++it) { + const Json* value = *it; + assert(value != NULL); if (!value->GetStringValueForKey("id", &id)) { success = false; goto callback; } - assert(*it == id); // Sanity check. + assert(!id.empty()); size_t depth = std::count(id.begin(), id.end(), '/') - 1; assert(depth < arraysize(LookupKey::kHierarchy)); @@ -143,11 +152,15 @@ class Helper { sub_rules.push_back(rule); } - // Add the ID of this Rule object to the rule index. - std::pair result = - rule_index_->insert(std::make_pair(id, rule)); - assert(result.second); - (void)result; // Prevent unused variable if assert() is optimized away. + // Add the ID of this Rule object to the rule index with natural string + // comparison for keys. + last_index_it = + rule_index_->insert(last_index_it, std::make_pair(id, rule)); + + // Add the ID of this Rule object to the region-specific rule index with + // exact string comparison for keys. + last_region_it = + region_rules_->insert(last_region_it, std::make_pair(id, rule)); ++rule_count; } @@ -179,9 +192,12 @@ class Helper { } parent_id.resize(pos); - IndexMap::const_iterator jt = rule_index_->find(parent_id); - assert(jt != rule_index_->end()); - hierarchy.push(jt->second); + IndexMap::const_iterator* const hint = &hints[hierarchy.size() - 1]; + if (*hint == rule_index_->end() || (*hint)->first != parent_id) { + *hint = rule_index_->find(parent_id); + } + assert(*hint != rule_index_->end()); + hierarchy.push((*hint)->second); } std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1)); @@ -217,13 +233,15 @@ class Helper { } } - rule_index_->insert(std::make_pair(human_id, *it)); + last_index_it = + rule_index_->insert(last_index_it, std::make_pair(human_id, *it)); // Add the Latin script ID, if a Latin script name could be found for // every part of the ID. if (std::count(human_id.begin(), human_id.end(), '/') == std::count(latin_id.begin(), latin_id.end(), '/')) { - rule_index_->insert(std::make_pair(latin_id, *it)); + last_latin_it = + rule_index_->insert(last_latin_it, std::make_pair(latin_id, *it)); } } @@ -237,6 +255,7 @@ class Helper { std::set* const pending_; IndexMap* const rule_index_; std::vector* const rule_storage_; + std::map* const region_rules_; const scoped_ptr retrieved_; DISALLOW_COPY_AND_ASSIGN(Helper); @@ -258,7 +277,8 @@ PreloadSupplier::PreloadSupplier(const std::string& validation_data_url, : retriever_(new Retriever(validation_data_url, downloader, storage)), pending_(), rule_index_(new IndexMap), - rule_storage_() {} + rule_storage_(), + region_rules_() {} PreloadSupplier::~PreloadSupplier() { for (std::vector::const_iterator @@ -303,7 +323,14 @@ void PreloadSupplier::LoadRules(const std::string& region_code, *retriever_, &pending_, rule_index_.get(), - &rule_storage_); + &rule_storage_, + ®ion_rules_[region_code]); +} + +const std::map& PreloadSupplier::GetRulesForRegion( + const std::string& region_code) const { + assert(IsLoaded(region_code)); + return region_rules_.find(region_code)->second; } bool PreloadSupplier::IsLoaded(const std::string& region_code) const { diff --git a/cpp/src/region_data_builder.cc b/cpp/src/region_data_builder.cc index cf49c71..b600d09 100644 --- a/cpp/src/region_data_builder.cc +++ b/cpp/src/region_data_builder.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -34,59 +35,91 @@ namespace addressinput { namespace { -// Does not take ownership of |supplier| or |parent_region|, neither of which is -// allowed to be NULL. -void BuildRegionTreeRecursively(PreloadSupplier* supplier, - const LookupKey& parent_key, - RegionData* parent_region, - const std::vector& keys, - bool prefer_latin_name) { - assert(supplier != NULL); +// The maximum depth of lookup keys. +static const size_t kLookupKeysMaxDepth = arraysize(LookupKey::kHierarchy) - 1; + +// Does not take ownership of |parent_region|, which is not allowed to be NULL. +void BuildRegionTreeRecursively( + const std::map& rules, + std::map::const_iterator hint, + const LookupKey& parent_key, + RegionData* parent_region, + const std::vector& keys, + bool prefer_latin_name, + size_t region_max_depth) { assert(parent_region != NULL); LookupKey lookup_key; for (std::vector::const_iterator key_it = keys.begin(); key_it != keys.end(); ++key_it) { lookup_key.FromLookupKey(parent_key, *key_it); - const Rule* rule = supplier->GetRule(lookup_key); - if (rule == NULL) { - return; + const std::string& lookup_key_string = + lookup_key.ToKeyString(kLookupKeysMaxDepth); + + ++hint; + if (hint == rules.end() || hint->first != lookup_key_string) { + hint = rules.find(lookup_key_string); + if (hint == rules.end()) { + return; + } } + + const Rule* rule = hint->second; + assert(rule != NULL); + const std::string& local_name = rule->GetName().empty() ? *key_it : rule->GetName(); const std::string& name = prefer_latin_name && !rule->GetLatinName().empty() ? rule->GetLatinName() : local_name; RegionData* region = parent_region->AddSubRegion(*key_it, name); - if (!rule->GetSubKeys().empty()) { - BuildRegionTreeRecursively( - supplier, lookup_key, region, rule->GetSubKeys(), prefer_latin_name); + + if (!rule->GetSubKeys().empty() && + region_max_depth > parent_key.GetDepth()) { + BuildRegionTreeRecursively(rules, + hint, + lookup_key, + region, + rule->GetSubKeys(), + prefer_latin_name, + region_max_depth); } } } -// Does not take ownership of |supplier|, which cannot be NULL. The caller owns -// the result. -RegionData* BuildRegion(PreloadSupplier* supplier, +// The caller owns the result. +RegionData* BuildRegion(const std::map& rules, const std::string& region_code, const Language& language) { - assert(supplier != NULL); - AddressData address; address.region_code = region_code; LookupKey lookup_key; lookup_key.FromAddress(address); - const Rule* const rule = supplier->GetRule(lookup_key); + std::map::const_iterator hint = + rules.find(lookup_key.ToKeyString(kLookupKeysMaxDepth)); + assert(hint != rules.end()); + + const Rule* rule = hint->second; assert(rule != NULL); RegionData* region = new RegionData(region_code); - BuildRegionTreeRecursively(supplier, - lookup_key, - region, - rule->GetSubKeys(), - language.has_latin_script); + + // If there're sub-keys for field X, but field X is not used in this region + // code, then these sub-keys are skipped over. For example, CH has sub-keys + // for field ADMIN_AREA, but CH does not use ADMIN_AREA field. + size_t region_max_depth = + RegionDataConstants::GetMaxLookupKeyDepth(region_code); + if (region_max_depth > 0) { + BuildRegionTreeRecursively(rules, + hint, + lookup_key, + region, + rule->GetSubKeys(), + language.has_latin_script, + region_max_depth); + } return region; } @@ -139,9 +172,11 @@ const RegionData& RegionDataBuilder::Build( LanguageRegionMap::const_iterator language_it = region_it->second->find(best_language.tag); if (language_it == region_it->second->end()) { + const std::map& rules = + supplier_->GetRulesForRegion(region_code); language_it = region_it->second->insert(std::make_pair(best_language.tag, - BuildRegion(supplier_, + BuildRegion(rules, region_code, best_language))) .first; diff --git a/cpp/src/util/json.cc b/cpp/src/util/json.cc index ef3056b..730479c 100644 --- a/cpp/src/util/json.cc +++ b/cpp/src/util/json.cc @@ -19,9 +19,7 @@ #include #include -#include #include -#include #include #include @@ -40,27 +38,31 @@ class Json::JsonImpl { : document_(new Document), value_(document_.get()), dictionaries_(), - keys_(), valid_(false) { document_->Parse(json.c_str()); valid_ = !document_->HasParseError() && document_->IsObject(); - if (valid_) { - BuildKeyList(); - } } ~JsonImpl() { - for (std::map::const_iterator - it = dictionaries_.begin(); - it != dictionaries_.end(); - ++it) { - delete it->second; + for (std::vector::const_iterator it = dictionaries_.begin(); + it != dictionaries_.end(); ++it) { + delete *it; } } bool valid() const { return valid_; } - const std::vector& GetKeys() const { return keys_; } + const std::vector& GetSubDictionaries() { + if (dictionaries_.empty()) { + for (Value::ConstMemberIterator member = value_->MemberBegin(); + member != value_->MemberEnd(); ++member) { + if (member->value.IsObject()) { + dictionaries_.push_back(new Json(new JsonImpl(&member->value))); + } + } + } + return dictionaries_; + } bool GetStringValueForKey(const std::string& key, std::string* value) const { assert(value != NULL); @@ -75,48 +77,15 @@ class Json::JsonImpl { return true; } - bool GetDictionaryValueForKey(const std::string& key, const Json** value) { - assert(value != NULL); - - std::map::const_iterator dict_it = - dictionaries_.find(key); - if (dict_it != dictionaries_.end()) { - *value = dict_it->second; - return true; - } - - Value::ConstMemberIterator member = value_->FindMember(key.c_str()); - if (member == NULL || !member->value.IsObject()) { - return false; - } - - std::pair::iterator, bool> result = - dictionaries_.insert( - std::make_pair(key, new Json(new JsonImpl(&member->value)))); - assert(result.second); - *value = result.first->second; - return true; - } - private: // Does not take ownership of |value|. explicit JsonImpl(const Value* value) : document_(), value_(value), dictionaries_(), - keys_(), valid_(true) { assert(value_ != NULL); assert(value_->IsObject()); - BuildKeyList(); - } - - void BuildKeyList() { - assert(keys_.empty()); - for (Value::ConstMemberIterator member = value_->MemberBegin(); - member != value_->MemberEnd(); ++member) { - keys_.push_back(member->name.GetString()); - } } // An owned JSON document. Can be NULL if the JSON document is not owned. @@ -125,11 +94,8 @@ class Json::JsonImpl { // A JSON document that is not owned. Cannot be NULL. Can point to document_. const Value* const value_; - // Owned JSON objects. - std::map dictionaries_; - - // The list of keys with values in the JSON object. - std::vector keys_; + // Owned JSON objects of sub-dictionaries. + std::vector dictionaries_; // True if the JSON object was parsed successfully. bool valid_; @@ -150,9 +116,9 @@ bool Json::ParseObject(const std::string& json) { return impl_ != NULL; } -const std::vector& Json::GetKeys() const { +const std::vector& Json::GetSubDictionaries() const { assert(impl_ != NULL); - return impl_->GetKeys(); + return impl_->GetSubDictionaries(); } bool Json::GetStringValueForKey(const std::string& key, @@ -161,12 +127,6 @@ bool Json::GetStringValueForKey(const std::string& key, return impl_->GetStringValueForKey(key, value); } -bool Json::GetDictionaryValueForKey(const std::string& key, - const Json** value) const { - assert(impl_ != NULL); - return impl_->GetDictionaryValueForKey(key, value); -} - Json::Json(JsonImpl* impl) : impl_(impl) {} } // namespace addressinput diff --git a/cpp/src/util/json.h b/cpp/src/util/json.h index 42cd331..1aac803 100644 --- a/cpp/src/util/json.h +++ b/cpp/src/util/json.h @@ -39,9 +39,10 @@ class Json { // object. bool ParseObject(const std::string& json); - // Returns the list of keys in the parsed JSON. The JSON object must be parsed - // successfully in ParseObject() before invoking this method. - const std::vector& GetKeys() const; + // Returns the list of sub dictionaries. The JSON object must be parsed + // successfully in ParseObject() before invoking this method. The caller does + // not own the result. + const std::vector& GetSubDictionaries() const; // Returns true if the parsed JSON contains a string value for |key|. Sets // |value| to the string value of the |key|. The JSON object must be parsed @@ -49,13 +50,6 @@ class Json { // parameter should not be NULL. bool GetStringValueForKey(const std::string& key, std::string* value) const; - // Returns true if the parsed JSON contains a dictionary value for |key|. - // Points |value| to the dictionary value of the |key|. The JSON object must - // be parsed successfully in ParseObject() before invoking this method. The - // caller does not own |value|. - bool GetDictionaryValueForKey(const std::string& key, - const Json** value) const; - private: class JsonImpl; friend class JsonImpl; diff --git a/cpp/test/preload_supplier_test.cc b/cpp/test/preload_supplier_test.cc index f8e9e23..7181ef2 100644 --- a/cpp/test/preload_supplier_test.cc +++ b/cpp/test/preload_supplier_test.cc @@ -121,4 +121,12 @@ TEST_F(PreloadSupplierTest, GetTooPreciseRule) { EXPECT_TRUE(rule == NULL); } +TEST_F(PreloadSupplierTest, GetRulesForRegion) { + supplier_.LoadRules("CN", *loaded_callback_); + const std::map& rules = + supplier_.GetRulesForRegion("CN"); + EXPECT_TRUE(rules.find("data/CN") != rules.end()); + EXPECT_LT(1, rules.size()); +} + } // namespace diff --git a/cpp/test/util/json_test.cc b/cpp/test/util/json_test.cc index 2306ac3..594c341 100644 --- a/cpp/test/util/json_test.cc +++ b/cpp/test/util/json_test.cc @@ -120,32 +120,35 @@ TEST(JsonTest, NumberIsNotValid) { TEST(JsonTest, NoDictionaryFound) { Json json; ASSERT_TRUE(json.ParseObject("{\"key\":\"value\"}")); - const Json* sub_json; - EXPECT_FALSE(json.GetDictionaryValueForKey("key", &sub_json)); + EXPECT_TRUE(json.GetSubDictionaries().empty()); } TEST(JsonTest, DictionaryFound) { Json json; ASSERT_TRUE(json.ParseObject("{\"key\":{\"inner_key\":\"value\"}}")); - const Json* sub_json = NULL; - ASSERT_TRUE(json.GetDictionaryValueForKey("key", &sub_json)); - ASSERT_TRUE(sub_json != NULL); + const std::vector& sub_dicts = json.GetSubDictionaries(); + ASSERT_EQ(1U, sub_dicts.size()); + std::string value; - EXPECT_TRUE(sub_json->GetStringValueForKey("inner_key", &value)); + EXPECT_TRUE(sub_dicts.front()->GetStringValueForKey("inner_key", &value)); EXPECT_EQ("value", value); } -TEST(JsonTest, DictionariesHaveKeys) { +TEST(JsonTest, DictionariesHaveSubDictionaries) { Json json; - ASSERT_TRUE(json.ParseObject("{\"key\":{\"inner_key\":\"value\"}}")); - std::vector expected_keys(1, "key"); - EXPECT_EQ(expected_keys, json.GetKeys()); - - const Json* sub_json = NULL; - ASSERT_TRUE(json.GetDictionaryValueForKey("key", &sub_json)); - ASSERT_TRUE(sub_json != NULL); - std::vector expected_sub_keys(1, "inner_key"); - EXPECT_EQ(expected_sub_keys, sub_json->GetKeys()); + ASSERT_TRUE(json.ParseObject( + "{\"key\":{\"inner_key\":{\"inner_inner_key\":\"value\"}}}")); + const std::vector& sub_dicts = json.GetSubDictionaries(); + ASSERT_EQ(1U, sub_dicts.size()); + + const std::vector& sub_sub_dicts = + sub_dicts.front()->GetSubDictionaries(); + ASSERT_EQ(1U, sub_sub_dicts.size()); + + std::string value; + EXPECT_TRUE( + sub_sub_dicts.front()->GetStringValueForKey("inner_inner_key", &value)); + EXPECT_EQ("value", value); } } // namespace -- cgit v1.2.3