aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrouslan@chromium.org <rouslan@chromium.org@38ededc0-08b8-5190-f2ac-b31f878777ad>2014-07-29 16:18:42 +0000
committerrouslan@chromium.org <rouslan@chromium.org@38ededc0-08b8-5190-f2ac-b31f878777ad>2014-07-29 16:18:42 +0000
commit0ddcd24a82f60578ab843feae5bea5545989cb96 (patch)
treed0ce3af5e9864c80fdbb99523e4f8d0e6d641eb1
parenta8cfad33049a40b2661d5a8dfd218b9de43f136b (diff)
downloadsrc-0ddcd24a82f60578ab843feae5bea5545989cb96.tar.gz
Improve efficiency of PreloadSupplier and RegionDataBuilder.
before: RegionDataBuilderTest.BuildCnRegionTree (8485 ms) after: RegionDataBuilderTest.BuildCnRegionTree (5765 ms) => 32% improvement. (Built using 'g++ -g -pg -O0') R=roubert@google.com Review URL: https://codereview.appspot.com/111390043 git-svn-id: http://libaddressinput.googlecode.com/svn/trunk@322 38ededc0-08b8-5190-f2ac-b31f878777ad
-rw-r--r--cpp/include/libaddressinput/preload_supplier.h7
-rw-r--r--cpp/src/preload_supplier.cc71
-rw-r--r--cpp/src/region_data_builder.cc87
-rw-r--r--cpp/src/util/json.cc76
-rw-r--r--cpp/src/util/json.h14
-rw-r--r--cpp/test/preload_supplier_test.cc8
-rw-r--r--cpp/test/util/json_test.cc35
7 files changed, 166 insertions, 132 deletions
diff --git a/cpp/include/libaddressinput/preload_supplier.h b/cpp/include/libaddressinput/preload_supplier.h
index 740b4d0..a4025cb 100644
--- a/cpp/include/libaddressinput/preload_supplier.h
+++ b/cpp/include/libaddressinput/preload_supplier.h
@@ -20,6 +20,7 @@
#include <libaddressinput/util/basictypes.h>
#include <libaddressinput/util/scoped_ptr.h>
+#include <map>
#include <set>
#include <string>
#include <vector>
@@ -80,6 +81,11 @@ class PreloadSupplier : public Supplier {
// Calls |loaded| when the loading has finished.
void LoadRules(const std::string& region_code, const Callback& loaded);
+ // Returns a mapping of lookup keys to rules. Should be called only when
+ // IsLoaded() returns true for the |region_code|.
+ const std::map<std::string, const Rule*>& GetRulesForRegion(
+ const std::string& region_code) const;
+
bool IsLoaded(const std::string& region_code) const;
bool IsPending(const std::string& region_code) const;
@@ -93,6 +99,7 @@ class PreloadSupplier : public Supplier {
std::set<std::string> pending_;
const scoped_ptr<IndexMap> rule_index_;
std::vector<const Rule*> rule_storage_;
+ std::map<std::string, std::map<std::string, const Rule*> > region_rules_;
DISALLOW_COPY_AND_ASSIGN(PreloadSupplier);
};
diff --git a/cpp/src/preload_supplier.cc b/cpp/src/preload_supplier.cc
index 26f7e2b..2557e33 100644
--- a/cpp/src/preload_supplier.cc
+++ b/cpp/src/preload_supplier.cc
@@ -72,16 +72,19 @@ class Helper {
const Retriever& retriever,
std::set<std::string>* pending,
IndexMap* rule_index,
- std::vector<const Rule*>* rule_storage)
+ std::vector<const Rule*>* rule_storage,
+ std::map<std::string, const Rule*>* region_rules)
: region_code_(region_code),
loaded_(loaded),
pending_(pending),
rule_index_(rule_index),
rule_storage_(rule_storage),
+ region_rules_(region_rules),
retrieved_(BuildCallback(this, &Helper::OnRetrieved)) {
assert(pending_ != NULL);
assert(rule_index_ != NULL);
assert(rule_storage_ != NULL);
+ assert(region_rules_ != NULL);
assert(retrieved_ != NULL);
pending_->insert(key);
retriever.Retrieve(key, *retrieved_);
@@ -103,6 +106,14 @@ class Helper {
std::string id;
std::vector<const Rule*> sub_rules;
+ IndexMap::iterator last_index_it = rule_index_->end();
+ IndexMap::iterator last_latin_it = rule_index_->end();
+ std::map<std::string, const Rule*>::iterator last_region_it =
+ region_rules_->end();
+
+ IndexMap::const_iterator hints[arraysize(LookupKey::kHierarchy) - 1];
+ std::fill(hints, hints + arraysize(hints), rule_index_->end());
+
if (!success) {
goto callback;
}
@@ -112,19 +123,17 @@ class Helper {
goto callback;
}
- for (std::vector<std::string>::const_iterator
- it = json.GetKeys().begin(); it != json.GetKeys().end(); ++it) {
- const Json* value;
- if (!json.GetDictionaryValueForKey(*it, &value)) {
- success = false;
- goto callback;
- }
-
+ for (std::vector<const Json*>::const_iterator
+ it = json.GetSubDictionaries().begin();
+ it != json.GetSubDictionaries().end();
+ ++it) {
+ const Json* value = *it;
+ assert(value != NULL);
if (!value->GetStringValueForKey("id", &id)) {
success = false;
goto callback;
}
- assert(*it == id); // Sanity check.
+ assert(!id.empty());
size_t depth = std::count(id.begin(), id.end(), '/') - 1;
assert(depth < arraysize(LookupKey::kHierarchy));
@@ -143,11 +152,15 @@ class Helper {
sub_rules.push_back(rule);
}
- // Add the ID of this Rule object to the rule index.
- std::pair<IndexMap::iterator, bool> result =
- rule_index_->insert(std::make_pair(id, rule));
- assert(result.second);
- (void)result; // Prevent unused variable if assert() is optimized away.
+ // Add the ID of this Rule object to the rule index with natural string
+ // comparison for keys.
+ last_index_it =
+ rule_index_->insert(last_index_it, std::make_pair(id, rule));
+
+ // Add the ID of this Rule object to the region-specific rule index with
+ // exact string comparison for keys.
+ last_region_it =
+ region_rules_->insert(last_region_it, std::make_pair(id, rule));
++rule_count;
}
@@ -179,9 +192,12 @@ class Helper {
}
parent_id.resize(pos);
- IndexMap::const_iterator jt = rule_index_->find(parent_id);
- assert(jt != rule_index_->end());
- hierarchy.push(jt->second);
+ IndexMap::const_iterator* const hint = &hints[hierarchy.size() - 1];
+ if (*hint == rule_index_->end() || (*hint)->first != parent_id) {
+ *hint = rule_index_->find(parent_id);
+ }
+ assert(*hint != rule_index_->end());
+ hierarchy.push((*hint)->second);
}
std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1));
@@ -217,13 +233,15 @@ class Helper {
}
}
- rule_index_->insert(std::make_pair(human_id, *it));
+ last_index_it =
+ rule_index_->insert(last_index_it, std::make_pair(human_id, *it));
// Add the Latin script ID, if a Latin script name could be found for
// every part of the ID.
if (std::count(human_id.begin(), human_id.end(), '/') ==
std::count(latin_id.begin(), latin_id.end(), '/')) {
- rule_index_->insert(std::make_pair(latin_id, *it));
+ last_latin_it =
+ rule_index_->insert(last_latin_it, std::make_pair(latin_id, *it));
}
}
@@ -237,6 +255,7 @@ class Helper {
std::set<std::string>* const pending_;
IndexMap* const rule_index_;
std::vector<const Rule*>* const rule_storage_;
+ std::map<std::string, const Rule*>* const region_rules_;
const scoped_ptr<const Retriever::Callback> retrieved_;
DISALLOW_COPY_AND_ASSIGN(Helper);
@@ -258,7 +277,8 @@ PreloadSupplier::PreloadSupplier(const std::string& validation_data_url,
: retriever_(new Retriever(validation_data_url, downloader, storage)),
pending_(),
rule_index_(new IndexMap),
- rule_storage_() {}
+ rule_storage_(),
+ region_rules_() {}
PreloadSupplier::~PreloadSupplier() {
for (std::vector<const Rule*>::const_iterator
@@ -303,7 +323,14 @@ void PreloadSupplier::LoadRules(const std::string& region_code,
*retriever_,
&pending_,
rule_index_.get(),
- &rule_storage_);
+ &rule_storage_,
+ &region_rules_[region_code]);
+}
+
+const std::map<std::string, const Rule*>& PreloadSupplier::GetRulesForRegion(
+ const std::string& region_code) const {
+ assert(IsLoaded(region_code));
+ return region_rules_.find(region_code)->second;
}
bool PreloadSupplier::IsLoaded(const std::string& region_code) const {
diff --git a/cpp/src/region_data_builder.cc b/cpp/src/region_data_builder.cc
index cf49c71..b600d09 100644
--- a/cpp/src/region_data_builder.cc
+++ b/cpp/src/region_data_builder.cc
@@ -17,6 +17,7 @@
#include <libaddressinput/address_data.h>
#include <libaddressinput/preload_supplier.h>
#include <libaddressinput/region_data.h>
+#include <libaddressinput/util/basictypes.h>
#include <cassert>
#include <cstddef>
@@ -34,59 +35,91 @@ namespace addressinput {
namespace {
-// Does not take ownership of |supplier| or |parent_region|, neither of which is
-// allowed to be NULL.
-void BuildRegionTreeRecursively(PreloadSupplier* supplier,
- const LookupKey& parent_key,
- RegionData* parent_region,
- const std::vector<std::string>& keys,
- bool prefer_latin_name) {
- assert(supplier != NULL);
+// The maximum depth of lookup keys.
+static const size_t kLookupKeysMaxDepth = arraysize(LookupKey::kHierarchy) - 1;
+
+// Does not take ownership of |parent_region|, which is not allowed to be NULL.
+void BuildRegionTreeRecursively(
+ const std::map<std::string, const Rule*>& rules,
+ std::map<std::string, const Rule*>::const_iterator hint,
+ const LookupKey& parent_key,
+ RegionData* parent_region,
+ const std::vector<std::string>& keys,
+ bool prefer_latin_name,
+ size_t region_max_depth) {
assert(parent_region != NULL);
LookupKey lookup_key;
for (std::vector<std::string>::const_iterator key_it = keys.begin();
key_it != keys.end(); ++key_it) {
lookup_key.FromLookupKey(parent_key, *key_it);
- const Rule* rule = supplier->GetRule(lookup_key);
- if (rule == NULL) {
- return;
+ const std::string& lookup_key_string =
+ lookup_key.ToKeyString(kLookupKeysMaxDepth);
+
+ ++hint;
+ if (hint == rules.end() || hint->first != lookup_key_string) {
+ hint = rules.find(lookup_key_string);
+ if (hint == rules.end()) {
+ return;
+ }
}
+
+ const Rule* rule = hint->second;
+ assert(rule != NULL);
+
const std::string& local_name = rule->GetName().empty()
? *key_it : rule->GetName();
const std::string& name =
prefer_latin_name && !rule->GetLatinName().empty()
? rule->GetLatinName() : local_name;
RegionData* region = parent_region->AddSubRegion(*key_it, name);
- if (!rule->GetSubKeys().empty()) {
- BuildRegionTreeRecursively(
- supplier, lookup_key, region, rule->GetSubKeys(), prefer_latin_name);
+
+ if (!rule->GetSubKeys().empty() &&
+ region_max_depth > parent_key.GetDepth()) {
+ BuildRegionTreeRecursively(rules,
+ hint,
+ lookup_key,
+ region,
+ rule->GetSubKeys(),
+ prefer_latin_name,
+ region_max_depth);
}
}
}
-// Does not take ownership of |supplier|, which cannot be NULL. The caller owns
-// the result.
-RegionData* BuildRegion(PreloadSupplier* supplier,
+// The caller owns the result.
+RegionData* BuildRegion(const std::map<std::string, const Rule*>& rules,
const std::string& region_code,
const Language& language) {
- assert(supplier != NULL);
-
AddressData address;
address.region_code = region_code;
LookupKey lookup_key;
lookup_key.FromAddress(address);
- const Rule* const rule = supplier->GetRule(lookup_key);
+ std::map<std::string, const Rule*>::const_iterator hint =
+ rules.find(lookup_key.ToKeyString(kLookupKeysMaxDepth));
+ assert(hint != rules.end());
+
+ const Rule* rule = hint->second;
assert(rule != NULL);
RegionData* region = new RegionData(region_code);
- BuildRegionTreeRecursively(supplier,
- lookup_key,
- region,
- rule->GetSubKeys(),
- language.has_latin_script);
+
+ // If there're sub-keys for field X, but field X is not used in this region
+ // code, then these sub-keys are skipped over. For example, CH has sub-keys
+ // for field ADMIN_AREA, but CH does not use ADMIN_AREA field.
+ size_t region_max_depth =
+ RegionDataConstants::GetMaxLookupKeyDepth(region_code);
+ if (region_max_depth > 0) {
+ BuildRegionTreeRecursively(rules,
+ hint,
+ lookup_key,
+ region,
+ rule->GetSubKeys(),
+ language.has_latin_script,
+ region_max_depth);
+ }
return region;
}
@@ -139,9 +172,11 @@ const RegionData& RegionDataBuilder::Build(
LanguageRegionMap::const_iterator language_it =
region_it->second->find(best_language.tag);
if (language_it == region_it->second->end()) {
+ const std::map<std::string, const Rule*>& rules =
+ supplier_->GetRulesForRegion(region_code);
language_it =
region_it->second->insert(std::make_pair(best_language.tag,
- BuildRegion(supplier_,
+ BuildRegion(rules,
region_code,
best_language)))
.first;
diff --git a/cpp/src/util/json.cc b/cpp/src/util/json.cc
index ef3056b..730479c 100644
--- a/cpp/src/util/json.cc
+++ b/cpp/src/util/json.cc
@@ -19,9 +19,7 @@
#include <cassert>
#include <cstddef>
-#include <map>
#include <string>
-#include <utility>
#include <vector>
#include <rapidjson/document.h>
@@ -40,27 +38,31 @@ class Json::JsonImpl {
: document_(new Document),
value_(document_.get()),
dictionaries_(),
- keys_(),
valid_(false) {
document_->Parse<kParseValidateEncodingFlag>(json.c_str());
valid_ = !document_->HasParseError() && document_->IsObject();
- if (valid_) {
- BuildKeyList();
- }
}
~JsonImpl() {
- for (std::map<std::string, const Json*>::const_iterator
- it = dictionaries_.begin();
- it != dictionaries_.end();
- ++it) {
- delete it->second;
+ for (std::vector<const Json*>::const_iterator it = dictionaries_.begin();
+ it != dictionaries_.end(); ++it) {
+ delete *it;
}
}
bool valid() const { return valid_; }
- const std::vector<std::string>& GetKeys() const { return keys_; }
+ const std::vector<const Json*>& GetSubDictionaries() {
+ if (dictionaries_.empty()) {
+ for (Value::ConstMemberIterator member = value_->MemberBegin();
+ member != value_->MemberEnd(); ++member) {
+ if (member->value.IsObject()) {
+ dictionaries_.push_back(new Json(new JsonImpl(&member->value)));
+ }
+ }
+ }
+ return dictionaries_;
+ }
bool GetStringValueForKey(const std::string& key, std::string* value) const {
assert(value != NULL);
@@ -75,48 +77,15 @@ class Json::JsonImpl {
return true;
}
- bool GetDictionaryValueForKey(const std::string& key, const Json** value) {
- assert(value != NULL);
-
- std::map<std::string, const Json*>::const_iterator dict_it =
- dictionaries_.find(key);
- if (dict_it != dictionaries_.end()) {
- *value = dict_it->second;
- return true;
- }
-
- Value::ConstMemberIterator member = value_->FindMember(key.c_str());
- if (member == NULL || !member->value.IsObject()) {
- return false;
- }
-
- std::pair<std::map<std::string, const Json*>::iterator, bool> result =
- dictionaries_.insert(
- std::make_pair(key, new Json(new JsonImpl(&member->value))));
- assert(result.second);
- *value = result.first->second;
- return true;
- }
-
private:
// Does not take ownership of |value|.
explicit JsonImpl(const Value* value)
: document_(),
value_(value),
dictionaries_(),
- keys_(),
valid_(true) {
assert(value_ != NULL);
assert(value_->IsObject());
- BuildKeyList();
- }
-
- void BuildKeyList() {
- assert(keys_.empty());
- for (Value::ConstMemberIterator member = value_->MemberBegin();
- member != value_->MemberEnd(); ++member) {
- keys_.push_back(member->name.GetString());
- }
}
// An owned JSON document. Can be NULL if the JSON document is not owned.
@@ -125,11 +94,8 @@ class Json::JsonImpl {
// A JSON document that is not owned. Cannot be NULL. Can point to document_.
const Value* const value_;
- // Owned JSON objects.
- std::map<std::string, const Json*> dictionaries_;
-
- // The list of keys with values in the JSON object.
- std::vector<std::string> keys_;
+ // Owned JSON objects of sub-dictionaries.
+ std::vector<const Json*> dictionaries_;
// True if the JSON object was parsed successfully.
bool valid_;
@@ -150,9 +116,9 @@ bool Json::ParseObject(const std::string& json) {
return impl_ != NULL;
}
-const std::vector<std::string>& Json::GetKeys() const {
+const std::vector<const Json*>& Json::GetSubDictionaries() const {
assert(impl_ != NULL);
- return impl_->GetKeys();
+ return impl_->GetSubDictionaries();
}
bool Json::GetStringValueForKey(const std::string& key,
@@ -161,12 +127,6 @@ bool Json::GetStringValueForKey(const std::string& key,
return impl_->GetStringValueForKey(key, value);
}
-bool Json::GetDictionaryValueForKey(const std::string& key,
- const Json** value) const {
- assert(impl_ != NULL);
- return impl_->GetDictionaryValueForKey(key, value);
-}
-
Json::Json(JsonImpl* impl) : impl_(impl) {}
} // namespace addressinput
diff --git a/cpp/src/util/json.h b/cpp/src/util/json.h
index 42cd331..1aac803 100644
--- a/cpp/src/util/json.h
+++ b/cpp/src/util/json.h
@@ -39,9 +39,10 @@ class Json {
// object.
bool ParseObject(const std::string& json);
- // Returns the list of keys in the parsed JSON. The JSON object must be parsed
- // successfully in ParseObject() before invoking this method.
- const std::vector<std::string>& GetKeys() const;
+ // Returns the list of sub dictionaries. The JSON object must be parsed
+ // successfully in ParseObject() before invoking this method. The caller does
+ // not own the result.
+ const std::vector<const Json*>& GetSubDictionaries() const;
// Returns true if the parsed JSON contains a string value for |key|. Sets
// |value| to the string value of the |key|. The JSON object must be parsed
@@ -49,13 +50,6 @@ class Json {
// parameter should not be NULL.
bool GetStringValueForKey(const std::string& key, std::string* value) const;
- // Returns true if the parsed JSON contains a dictionary value for |key|.
- // Points |value| to the dictionary value of the |key|. The JSON object must
- // be parsed successfully in ParseObject() before invoking this method. The
- // caller does not own |value|.
- bool GetDictionaryValueForKey(const std::string& key,
- const Json** value) const;
-
private:
class JsonImpl;
friend class JsonImpl;
diff --git a/cpp/test/preload_supplier_test.cc b/cpp/test/preload_supplier_test.cc
index f8e9e23..7181ef2 100644
--- a/cpp/test/preload_supplier_test.cc
+++ b/cpp/test/preload_supplier_test.cc
@@ -121,4 +121,12 @@ TEST_F(PreloadSupplierTest, GetTooPreciseRule) {
EXPECT_TRUE(rule == NULL);
}
+TEST_F(PreloadSupplierTest, GetRulesForRegion) {
+ supplier_.LoadRules("CN", *loaded_callback_);
+ const std::map<std::string, const Rule*>& rules =
+ supplier_.GetRulesForRegion("CN");
+ EXPECT_TRUE(rules.find("data/CN") != rules.end());
+ EXPECT_LT(1, rules.size());
+}
+
} // namespace
diff --git a/cpp/test/util/json_test.cc b/cpp/test/util/json_test.cc
index 2306ac3..594c341 100644
--- a/cpp/test/util/json_test.cc
+++ b/cpp/test/util/json_test.cc
@@ -120,32 +120,35 @@ TEST(JsonTest, NumberIsNotValid) {
TEST(JsonTest, NoDictionaryFound) {
Json json;
ASSERT_TRUE(json.ParseObject("{\"key\":\"value\"}"));
- const Json* sub_json;
- EXPECT_FALSE(json.GetDictionaryValueForKey("key", &sub_json));
+ EXPECT_TRUE(json.GetSubDictionaries().empty());
}
TEST(JsonTest, DictionaryFound) {
Json json;
ASSERT_TRUE(json.ParseObject("{\"key\":{\"inner_key\":\"value\"}}"));
- const Json* sub_json = NULL;
- ASSERT_TRUE(json.GetDictionaryValueForKey("key", &sub_json));
- ASSERT_TRUE(sub_json != NULL);
+ const std::vector<const Json*>& sub_dicts = json.GetSubDictionaries();
+ ASSERT_EQ(1U, sub_dicts.size());
+
std::string value;
- EXPECT_TRUE(sub_json->GetStringValueForKey("inner_key", &value));
+ EXPECT_TRUE(sub_dicts.front()->GetStringValueForKey("inner_key", &value));
EXPECT_EQ("value", value);
}
-TEST(JsonTest, DictionariesHaveKeys) {
+TEST(JsonTest, DictionariesHaveSubDictionaries) {
Json json;
- ASSERT_TRUE(json.ParseObject("{\"key\":{\"inner_key\":\"value\"}}"));
- std::vector<std::string> expected_keys(1, "key");
- EXPECT_EQ(expected_keys, json.GetKeys());
-
- const Json* sub_json = NULL;
- ASSERT_TRUE(json.GetDictionaryValueForKey("key", &sub_json));
- ASSERT_TRUE(sub_json != NULL);
- std::vector<std::string> expected_sub_keys(1, "inner_key");
- EXPECT_EQ(expected_sub_keys, sub_json->GetKeys());
+ ASSERT_TRUE(json.ParseObject(
+ "{\"key\":{\"inner_key\":{\"inner_inner_key\":\"value\"}}}"));
+ const std::vector<const Json*>& sub_dicts = json.GetSubDictionaries();
+ ASSERT_EQ(1U, sub_dicts.size());
+
+ const std::vector<const Json*>& sub_sub_dicts =
+ sub_dicts.front()->GetSubDictionaries();
+ ASSERT_EQ(1U, sub_sub_dicts.size());
+
+ std::string value;
+ EXPECT_TRUE(
+ sub_sub_dicts.front()->GetStringValueForKey("inner_inner_key", &value));
+ EXPECT_EQ("value", value);
}
} // namespace