aboutsummaryrefslogtreecommitdiff
path: root/icing/legacy
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2022-05-20 20:39:53 +0000
committerTim Barron <tjbarron@google.com>2022-05-20 20:39:53 +0000
commit9ea2234cfe87ffeca5e632704c8cf3ddbb00609b (patch)
tree62f140d7fa7af039b91935470bc0b59e0d8ed61e /icing/legacy
parent21441c71652b1116c467e106e1b735a9bd90541d (diff)
downloadicing-9ea2234cfe87ffeca5e632704c8cf3ddbb00609b.tar.gz
Sync from upstream.
Descriptions: ====================================================================== Fix bug in schema store where a failure during RegenerateDerivedFiles would lead to a dangling pointer. ====================================================================== Add RAII class that will create and destroy file directories. ====================================================================== Convert MainIndexDebugInfoProto and LiteIndexDebugInfoProto to string ====================================================================== Make SchemaStore move assignable. ====================================================================== Rollback of "convert the string lexicon debug information to a protocol buffer" ====================================================================== Fix NPE caused by a remap failure. ====================================================================== Unify the name "priority" and "severity" in Icing logging ====================================================================== Avoiding string formatting in Icing logging when we should not log ====================================================================== Switch to use an enum with BASIC/DETAILED to control the verbosity of getDebugInfo ====================================================================== Remove the behavior in the Language Segmenter to filter out non-ascii+non-alphanumeric characters. ====================================================================== Fix the SetSchema bug when we override a schema with nested incompatible types ====================================================================== Wrap __android_log_write with __android_is_loggable ====================================================================== Enable removing expired page tokens to free cache space ====================================================================== Bug: 146903474 Bug: 193453081 Bug: 222349894 Bug: 229770338 Bug: 229778472 Bug: 230879098 Bug: 231416401 Bug: 231237897 Bug: 232273174 Change-Id: I22f050de16f56dce39e12a7033947519d598c840
Diffstat (limited to 'icing/legacy')
-rw-r--r--icing/legacy/index/icing-dynamic-trie.cc236
-rw-r--r--icing/legacy/index/icing-dynamic-trie.h61
-rw-r--r--icing/legacy/index/icing-dynamic-trie_test.cc197
3 files changed, 183 insertions, 311 deletions
diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
index d74b14f..77876c4 100644
--- a/icing/legacy/index/icing-dynamic-trie.cc
+++ b/icing/legacy/index/icing-dynamic-trie.cc
@@ -73,7 +73,6 @@
#include <cstdint>
#include <cstring>
#include <memory>
-#include <optional>
#include <utility>
#include "icing/legacy/core/icing-packed-pod.h"
@@ -112,26 +111,6 @@ int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
}
return valid_nexts_length;
}
-
-// Get property id from filename.
-std::optional<uint32_t> GetPropertyIDFromFileName(const std::string &filename) {
- size_t property_id_start_idx = filename.rfind('.');
- if (property_id_start_idx == std::string::npos) {
- ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
- filename.c_str());
- return std::nullopt;
- }
- ++property_id_start_idx; // skip dot
- char *end;
- uint32_t property_id =
- strtol(filename.c_str() + property_id_start_idx, &end, 10); // NOLINT
- if (!end || end != (filename.c_str() + filename.size())) {
- ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
- filename.c_str());
- return std::nullopt;
- }
- return property_id;
-}
} // namespace
// Based on the bit field widths.
@@ -344,7 +323,7 @@ class IcingDynamicTrie::IcingDynamicTrieStorage {
uint32_t value_size() const { return hdr().value_size(); }
- void FillDirtyPageStats(LexiconDebugInfoProto *stats) const;
+ void FillDirtyPageStats(Stats *stats) const;
void inc_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() + 1); }
@@ -983,13 +962,10 @@ uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::suffixes_left() const {
}
void IcingDynamicTrie::IcingDynamicTrieStorage::FillDirtyPageStats(
- LexiconDebugInfoProto *stats) const {
- stats->mutable_node_info()->set_dirty_pages(
- array_storage_[NODE].num_dirty_pages());
- stats->mutable_next_info()->set_dirty_pages(
- array_storage_[NEXT].num_dirty_pages());
- stats->mutable_suffix_info()->set_dirty_pages(
- array_storage_[SUFFIX].num_dirty_pages());
+ Stats *stats) const {
+ stats->dirty_pages_nodes = array_storage_[NODE].num_dirty_pages();
+ stats->dirty_pages_nexts = array_storage_[NEXT].num_dirty_pages();
+ stats->dirty_pages_suffixes = array_storage_[SUFFIX].num_dirty_pages();
}
// Dumper.
@@ -1275,8 +1251,19 @@ bool IcingDynamicTrie::InitPropertyBitmaps() {
}
for (size_t i = 0; i < files.size(); i++) {
// Decode property id from filename.
- std::optional<uint32_t> property_id = GetPropertyIDFromFileName(files[i]);
- if (!property_id.has_value()) {
+ size_t property_id_start_idx = files[i].rfind('.');
+ if (property_id_start_idx == std::string::npos) {
+ ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
+ files[i].c_str());
+ continue;
+ }
+ property_id_start_idx++; // skip dot
+ char *end;
+ uint32_t property_id =
+ strtol(files[i].c_str() + property_id_start_idx, &end, 10); // NOLINT
+ if (!end || end != (files[i].c_str() + files[i].size())) {
+ ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
+ files[i].c_str());
continue;
}
std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap(
@@ -1290,9 +1277,9 @@ bool IcingDynamicTrie::InitPropertyBitmaps() {
}
bitmap->Truncate(truncate_idx);
if (property_id >= property_bitmaps_.size()) {
- property_bitmaps_.resize(*property_id + 1);
+ property_bitmaps_.resize(property_id + 1);
}
- property_bitmaps_[*property_id] = std::move(bitmap);
+ property_bitmaps_[property_id] = std::move(bitmap);
}
deleted_bitmap_ = OpenAndInitBitmap(
@@ -1380,24 +1367,19 @@ uint32_t IcingDynamicTrie::size() const {
return storage_->hdr().num_keys();
}
-void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
- LexiconDebugInfoProto *stats,
+void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
uint32_t depth) const {
- LexiconDebugInfoProto::NodeInfo *node_info = stats->mutable_node_info();
- LexiconDebugInfoProto::NextInfo *next_info = stats->mutable_next_info();
- LexiconDebugInfoProto::SuffixInfo *suffix_info = stats->mutable_suffix_info();
if (node.is_leaf()) {
- node_info->set_num_leaves(node_info->num_leaves() + 1);
- node_info->set_sum_depth(node_info->sum_depth() + depth);
- node_info->set_max_depth(max(node_info->max_depth(), depth));
+ stats->num_leaves++;
+ stats->sum_depth += depth;
+ stats->max_depth = max(stats->max_depth, depth);
const char *suffix = storage_->GetSuffix(node.next_index());
- suffix_info->set_suffixes_used(suffix_info->suffixes_used() +
- strlen(suffix) + 1 + value_size());
- if (suffix[0] == '\0') {
- suffix_info->set_num_null_suffixes(suffix_info->num_null_suffixes() + 1);
+ stats->suffixes_used += strlen(suffix) + 1 + value_size();
+ if (!suffix[0]) {
+ stats->null_suffixes++;
}
} else {
- node_info->set_num_intermediates(node_info->num_intermediates() + 1);
+ stats->num_intermediates++;
uint32_t i = 0;
for (; i < (1U << node.log2_num_children()); i++) {
const Next &next = *storage_->GetNext(node.next_index(), i);
@@ -1410,42 +1392,30 @@ void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
if (i == 0) {
ICING_LOG(FATAL) << "No valid node in 'next' array";
}
- node_info->set_sum_children(node_info->sum_children() + i);
- node_info->set_max_children(max(node_info->max_children(), i));
+ stats->sum_children += i;
+ stats->max_children = max(stats->max_children, i);
- if (next_info->child_counts_size() > 0) {
- next_info->set_child_counts(i - 1, next_info->child_counts(i - 1) + 1);
- }
- uint32_t wasted = (1 << node.log2_num_children()) - i;
- next_info->set_wasted(node.log2_num_children(),
- next_info->wasted(node.log2_num_children()) + wasted);
- next_info->set_total_wasted(next_info->total_wasted() + wasted);
+ stats->child_counts[i - 1]++;
+ stats->wasted[node.log2_num_children()] +=
+ (1 << node.log2_num_children()) - i;
+ stats->total_wasted += (1 << node.log2_num_children()) - i;
}
}
-void IcingDynamicTrie::CollectStats(LexiconDebugInfoProto *stats,
- int verbosity) const {
+void IcingDynamicTrie::CollectStats(Stats *stats) const {
if (!is_initialized()) {
ICING_LOG(FATAL) << "DynamicTrie not initialized";
}
- LexiconDebugInfoProto::NodeInfo *node_info = stats->mutable_node_info();
- LexiconDebugInfoProto::NextInfo *next_info = stats->mutable_next_info();
- LexiconDebugInfoProto::SuffixInfo *suffix_info = stats->mutable_suffix_info();
-
- if (verbosity > 0) {
- next_info->mutable_child_counts()->Resize(kMaxNextArraySize, 0);
- }
- next_info->mutable_wasted()->Resize(kNumNextAllocationBuckets, 0);
- next_info->mutable_num_free()->Resize(kNumNextAllocationBuckets, 0);
+ memset(stats, 0, sizeof(*stats));
- stats->set_num_keys(storage_->hdr().num_keys());
- node_info->set_num_nodes(storage_->hdr().num_nodes());
- node_info->set_max_nodes(storage_->hdr().max_nodes());
- next_info->set_num_nexts(storage_->hdr().num_nexts());
- next_info->set_max_nexts(storage_->hdr().max_nexts());
- suffix_info->set_suffixes_capacity(storage_->hdr().suffixes_size());
- suffix_info->set_max_suffixes_capacity(storage_->hdr().max_suffixes_size());
+ stats->num_keys = storage_->hdr().num_keys();
+ stats->num_nodes = storage_->hdr().num_nodes();
+ stats->max_nodes = storage_->hdr().max_nodes();
+ stats->num_nexts = storage_->hdr().num_nexts();
+ stats->max_nexts = storage_->hdr().max_nexts();
+ stats->suffixes_size = storage_->hdr().suffixes_size();
+ stats->max_suffixes_size = storage_->hdr().max_suffixes_size();
// Stats collected from traversing the trie.
if (!storage_->empty()) {
@@ -1456,23 +1426,80 @@ void IcingDynamicTrie::CollectStats(LexiconDebugInfoProto *stats,
for (int i = 0; i < kNumNextAllocationBuckets; i++) {
for (uint32_t cur = storage_->hdr().free_lists(i); cur != kInvalidNextIndex;
cur = storage_->GetNext(cur, 0)->next_index()) {
- next_info->set_num_free(i, next_info->num_free(i) + 1);
+ stats->num_free[i]++;
}
- next_info->set_total_free(next_info->total_free() +
- next_info->num_free(i) * (1 << i));
+ stats->total_free += stats->num_free[i] * (1 << i);
}
// Dirty page counts.
storage_->FillDirtyPageStats(stats);
+}
+
+std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
+ std::string ret;
+ IcingStringUtil::SStringAppendF(
+ &ret, 0,
+ "Keys %u "
+ "Nodes (%u/%u) %.3f%% "
+ "Nexts (%u/%u) %.3f%% "
+ "Suffixes (%u/%u) %.3f%%\n",
+ num_keys, num_nodes, max_nodes,
+ 100. * math_util::SafeDivide(num_nodes, max_nodes), num_nexts, max_nexts,
+ 100. * math_util::SafeDivide(num_nexts, max_nexts), suffixes_size,
+ max_suffixes_size,
+ 100. * math_util::SafeDivide(suffixes_size, max_suffixes_size));
+
+ if (verbosity > 0) {
+ for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+ if (num_free[i] > 0) {
+ IcingStringUtil::SStringAppendF(&ret, 0, "Freelist@%d: %u\n", 1 << i,
+ num_free[i]);
+ }
+ }
+ IcingStringUtil::SStringAppendF(
+ &ret, 0, "Freelist total: %u/%u %.3f%%\n", total_free, num_nexts,
+ 100. * math_util::SafeDivide(total_free, num_nexts));
- // Some helper calculations to provide better readability.
- node_info->set_avg_children(math_util::SafeDivide(
- node_info->sum_children(), node_info->num_intermediates()));
- node_info->set_avg_depth(
- math_util::SafeDivide(node_info->sum_depth(), node_info->num_leaves()));
- next_info->set_total_frag(math_util::SafeDivide(
- (next_info->total_free() + next_info->total_wasted()),
- next_info->num_nexts()));
+ for (int i = 0; i < 256; i++) {
+ if (child_counts[i] > 0) {
+ IcingStringUtil::SStringAppendF(&ret, 0, "Child count@%d: %u\n", i + 1,
+ child_counts[i]);
+ }
+ }
+ for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+ IcingStringUtil::SStringAppendF(&ret, 0, "Wasted@%d: %u\n", 1 << i,
+ wasted[i]);
+ }
+ IcingStringUtil::SStringAppendF(
+ &ret, 0,
+ "Wasted total: %u\n"
+ "Num intermediates %u num leaves %u "
+ "suffixes used %u null %u\n"
+ "avg and max children for intermediates: %.3f, %u\n"
+ "avg and max depth for leaves: %.3f, %u\n"
+ "Total next frag: %.3f%%\n",
+ total_wasted, num_intermediates, num_leaves, suffixes_used,
+ null_suffixes, 1. * sum_children / num_intermediates, max_children,
+ 1. * sum_depth / num_leaves, max_depth,
+ 100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
+ }
+ IcingStringUtil::SStringAppendF(
+ &ret, 0, "Memory usage: %zu/%zu bytes\n",
+ num_nodes * sizeof(Node) + num_nexts * sizeof(Next) + suffixes_size,
+ max_nodes * sizeof(Node) + max_nexts * sizeof(Next) + max_suffixes_size);
+
+ IcingStringUtil::SStringAppendF(
+ &ret, 0, "Dirty pages: nodes %u/%.0f nexts %u/%.0f suffixes %u/%.0f\n",
+ dirty_pages_nodes,
+ math_util::SafeDivide(num_nodes * sizeof(Node) + getpagesize() - 1,
+ getpagesize()),
+ dirty_pages_nexts,
+ math_util::SafeDivide(num_nexts * sizeof(Next) + getpagesize() - 1,
+ getpagesize()),
+ dirty_pages_suffixes,
+ math_util::SafeDivide(suffixes_size + getpagesize() - 1, getpagesize()));
+
+ return ret;
}
void IcingDynamicTrie::DumpTrie(std::ostream *pretty_print,
@@ -2248,42 +2275,29 @@ std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key,
return prefix_lengths;
}
-LexiconDebugInfoProto IcingDynamicTrie::GetDebugInfo(int verbosity) const {
- LexiconDebugInfoProto stats;
- CollectStats(&stats, verbosity);
-
- if (verbosity <= 0) {
- return stats;
- }
+void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
+ Stats stats;
+ CollectStats(&stats);
+ out->append(stats.DumpStats(verbosity));
- // Property files summary.
+ // Property files.
vector<std::string> files;
if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
&files)) {
ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
"Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
- return stats;
+ return;
}
- LexiconDebugInfoProto::PropertyBitmapInfo *deleted_bitmap =
- stats.add_property_bitmaps_info();
- deleted_bitmap->set_property_id(-1);
- deleted_bitmap->set_file_size(
- filesystem_->GetFileSize(deleted_bitmap_filename_.c_str()));
for (size_t i = 0; i < files.size(); i++) {
- LexiconDebugInfoProto::PropertyBitmapInfo *info =
- stats.add_property_bitmaps_info();
- std::optional<uint32_t> property_id = GetPropertyIDFromFileName(files[i]);
- if (!property_id.has_value()) {
- continue;
- }
- info->set_property_id(*property_id);
- info->set_file_size(filesystem_->GetFileSize(files[i].c_str()));
+ IcingStringUtil::SStringAppendF(
+ out, 1000, "Prop file %s size %" PRIu64 "\n",
+ filesystem_->GetBasename(files[i].c_str()).c_str(),
+ filesystem_->GetFileSize(files[i].c_str()));
}
- return stats;
-}
-
-void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
- *out = GetDebugInfo(verbosity).DebugString();
+ IcingStringUtil::SStringAppendF(
+ out, 1000, "Deleted file %s size %" PRIu64 "\n",
+ filesystem_->GetBasename(deleted_bitmap_filename_.c_str()).c_str(),
+ filesystem_->GetFileSize(deleted_bitmap_filename_.c_str()));
}
double IcingDynamicTrie::min_free_fraction() const {
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index abb4f1a..013b926 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -47,7 +47,6 @@
#include "icing/legacy/index/icing-mmapper.h"
#include "icing/legacy/index/icing-storage.h"
#include "icing/legacy/index/proto/icing-dynamic-trie-header.pb.h"
-#include "icing/proto/debug.pb.h"
#include "icing/util/i18n-utils.h"
#include "unicode/utf8.h"
@@ -144,6 +143,58 @@ class IcingDynamicTrie : public IIcingStorage {
static const uint32_t kNoCrc = 0;
+ struct Stats {
+ uint32_t num_keys;
+
+ // Node stats
+
+ uint32_t num_nodes;
+ uint32_t max_nodes;
+ // Count of intermediate nodes.
+ uint32_t num_intermediates;
+ // Total and maximum number of children of intermediate nodes.
+ uint32_t sum_children, max_children;
+
+ // Count of leaf nodes.
+ uint32_t num_leaves;
+ // Total and maximum depth of leaf nodes.
+ uint32_t sum_depth, max_depth;
+
+ // Next stats
+
+ uint32_t num_nexts;
+ uint32_t max_nexts;
+ // Count of next arrays by size.
+ uint32_t child_counts[kMaxNextArraySize];
+ // Wasted next array space per allocation bucket (in Nexts, not
+ // bytes).
+ uint32_t wasted[kNumNextAllocationBuckets];
+ // Sum of wasted array.
+ uint32_t total_wasted;
+
+ // Suffix stats
+
+ uint32_t suffixes_size;
+ uint32_t max_suffixes_size;
+ // Bytes actually used by suffixes.
+ uint32_t suffixes_used;
+ // Number of suffixes that are just empty strings.
+ uint32_t null_suffixes;
+
+ // Next free-list stats
+ uint32_t num_free[kNumNextAllocationBuckets];
+ // Total Next nodes free (weighted sum of the above).
+ uint32_t total_free;
+
+ // Dirty pages.
+ uint32_t dirty_pages_nodes;
+ uint32_t dirty_pages_nexts;
+ uint32_t dirty_pages_suffixes;
+
+ // TODO(b/222349894) Convert the string output to a protocol buffer instead.
+ std::string DumpStats(int verbosity) const;
+ };
+
// Options when creating the trie. Maximums for the node/next/suffix
// arrays must be specified in advance.
struct Options {
@@ -230,7 +281,7 @@ class IcingDynamicTrie : public IIcingStorage {
uint32_t size() const;
// Collecting stats.
- void CollectStats(LexiconDebugInfoProto *stats, int verbosity) const;
+ void CollectStats(Stats *stats) const;
// Gets all of the contents of the trie for debugging purposes. Note: this
// stores the entire set of terms in memory.
@@ -349,10 +400,6 @@ class IcingDynamicTrie : public IIcingStorage {
// itself. If utf8 is true, does not cut key mid-utf8.
std::vector<int> FindBranchingPrefixLengths(const char *key, bool utf8) const;
- // Returns debug information for the dynamic trie.
- // verbosity <= 0, simplest debug information
- // verbosity > 0, more detailed debug information as indicated in debug.proto
- LexiconDebugInfoProto GetDebugInfo(int verbosity) const;
void GetDebugInfo(int verbosity, std::string *out) const override;
double min_free_fraction() const;
@@ -560,7 +607,7 @@ class IcingDynamicTrie : public IIcingStorage {
static const uint32_t kInvalidSuffixIndex;
// Stats helpers.
- void CollectStatsRecursive(const Node &node, LexiconDebugInfoProto *stats,
+ void CollectStatsRecursive(const Node &node, Stats *stats,
uint32_t depth = 0) const;
// Helpers for Find and Insert.
diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc
index d2cf48d..c8d1403 100644
--- a/icing/legacy/index/icing-dynamic-trie_test.cc
+++ b/icing/legacy/index/icing-dynamic-trie_test.cc
@@ -27,7 +27,6 @@
#include "gtest/gtest.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/index/icing-filesystem.h"
-#include "icing/proto/debug.pb.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
@@ -35,12 +34,7 @@ namespace lib {
namespace {
-using ::testing::Each;
-using ::testing::ElementsAre;
-using ::testing::ElementsAreArray;
-using ::testing::Gt;
-using ::testing::IsEmpty;
-using ::testing::UnorderedElementsAre;
+using testing::ElementsAre;
constexpr std::string_view kKeys[] = {
"", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg",
@@ -62,8 +56,9 @@ class IcingDynamicTrieTest : public ::testing::Test {
// Output trie stats to stderr.
static void StatsDump(const IcingDynamicTrie& trie) {
- DLOG(INFO) << "Stats:\n"
- << trie.GetDebugInfo(/*verbosity=*/1).DebugString();
+ IcingDynamicTrie::Stats stats;
+ trie.CollectStats(&stats);
+ DLOG(INFO) << "Stats:\n" << stats.DumpStats(true);
}
static void AddToTrie(IcingDynamicTrie* trie, uint32_t num_keys) {
@@ -1138,189 +1133,5 @@ TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) {
ASSERT_EQ(0, trie.property_bitmaps_.size());
}
-TEST_F(IcingDynamicTrieTest, GetDebugInfo) {
- IcingFilesystem filesystem;
- IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
- &filesystem);
- IcingDynamicTrie::Options option;
- ASSERT_TRUE(trie.CreateIfNotExist(option));
- ASSERT_TRUE(trie.Init());
-
- uint32_t unused = 0;
- ASSERT_TRUE(trie.Insert("", &unused));
- uint32_t val_idx;
- ASSERT_TRUE(trie.Insert("ab", &unused, &val_idx, false));
- trie.SetProperty(val_idx, 0);
- ASSERT_TRUE(trie.Insert("ac", &unused));
- ASSERT_TRUE(trie.Insert("abd", &unused));
- ASSERT_TRUE(trie.Insert("bac", &unused));
- ASSERT_TRUE(trie.Insert("bb", &unused));
- ASSERT_TRUE(trie.Insert("bacd", &unused));
- ASSERT_TRUE(trie.Insert("abbb", &unused));
- ASSERT_TRUE(trie.Insert("abcdefg", &unused));
- StatsDump(trie);
-
- LexiconDebugInfoProto info0 = trie.GetDebugInfo(/*verbosity=*/1);
- EXPECT_EQ(info0.num_keys(), 9);
- EXPECT_EQ(info0.node_info().num_nodes(), 15);
- EXPECT_EQ(info0.node_info().max_nodes(), option.max_nodes);
- EXPECT_EQ(info0.node_info().num_intermediates(), 6);
- EXPECT_EQ(info0.node_info().sum_children(), 14);
- EXPECT_EQ(info0.node_info().max_children(), 4);
- EXPECT_EQ(info0.node_info().avg_children(), (float)14 / 6);
- EXPECT_EQ(info0.node_info().num_leaves(), 9);
- EXPECT_EQ(info0.node_info().sum_depth(), 25);
- EXPECT_EQ(info0.node_info().max_depth(), 4);
- EXPECT_EQ(info0.node_info().avg_depth(), (float)25 / 9);
-
- EXPECT_EQ(info0.next_info().num_nexts(), 17);
- EXPECT_EQ(info0.next_info().max_nexts(), option.max_nexts);
- uint32_t exp_child_counts[IcingDynamicTrie::kMaxNextArraySize] = {1, 3, 1, 1};
- EXPECT_THAT(info0.next_info().child_counts(),
- ElementsAreArray(exp_child_counts));
- EXPECT_THAT(info0.next_info().wasted(),
- ElementsAre(0, 0, 1, 0, 0, 0, 0, 0, 0));
- EXPECT_EQ(info0.next_info().total_wasted(), 1);
- EXPECT_THAT(info0.next_info().num_free(),
- ElementsAre(0, 1, 0, 0, 0, 0, 0, 0, 0));
- EXPECT_EQ(info0.next_info().total_free(), 2);
- EXPECT_EQ(info0.next_info().total_frag(), (float)(2 + 1) / 17);
-
- EXPECT_THAT(info0.suffix_info().suffixes_capacity(), Gt(0));
- EXPECT_EQ(info0.suffix_info().max_suffixes_capacity(),
- option.max_suffixes_size);
- EXPECT_THAT(info0.suffix_info().suffixes_used(), Gt(0));
- EXPECT_EQ(info0.suffix_info().num_null_suffixes(), 7);
-
- EXPECT_EQ(info0.node_info().dirty_pages(), 1);
- EXPECT_EQ(info0.next_info().dirty_pages(), 1);
- EXPECT_EQ(info0.suffix_info().dirty_pages(), 1);
-
- std::vector<uint32_t> property_ids;
- for (int i = 0; i < info0.property_bitmaps_info_size(); i++) {
- property_ids.push_back(info0.property_bitmaps_info(i).property_id());
- }
- EXPECT_THAT(property_ids, UnorderedElementsAre(-1, 0));
-
- LexiconDebugInfoProto info1 = trie.GetDebugInfo(/*verbosity=*/0);
- EXPECT_THAT(info1.next_info().child_counts(), IsEmpty());
- EXPECT_THAT(info1.property_bitmaps_info(), IsEmpty());
-}
-
-TEST_F(IcingDynamicTrieTest, GetDebugInfoForEmptyTrie) {
- IcingFilesystem filesystem;
- IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
- &filesystem);
- IcingDynamicTrie::Options option;
- ASSERT_TRUE(trie.CreateIfNotExist(option));
- ASSERT_TRUE(trie.Init());
-
- LexiconDebugInfoProto info = trie.GetDebugInfo(/*verbosity=*/1);
- EXPECT_EQ(info.num_keys(), 0);
- EXPECT_EQ(info.node_info().num_nodes(), 0);
- EXPECT_EQ(info.node_info().max_nodes(), option.max_nodes);
- EXPECT_EQ(info.node_info().num_intermediates(), 0);
- EXPECT_EQ(info.node_info().sum_children(), 0);
- EXPECT_EQ(info.node_info().max_children(), 0);
- EXPECT_EQ(info.node_info().num_leaves(), 0);
- EXPECT_EQ(info.node_info().sum_depth(), 0);
- EXPECT_EQ(info.node_info().max_depth(), 0);
-
- EXPECT_EQ(info.next_info().num_nexts(), 0);
- EXPECT_EQ(info.next_info().max_nexts(), option.max_nexts);
- EXPECT_THAT(info.next_info().child_counts(), Each(0));
- EXPECT_THAT(info.next_info().wasted(), Each(0));
- EXPECT_EQ(info.next_info().total_wasted(), 0);
- EXPECT_THAT(info.next_info().num_free(), Each(0));
- EXPECT_EQ(info.next_info().total_free(), 0);
-
- EXPECT_EQ(info.suffix_info().suffixes_capacity(), 0);
- EXPECT_EQ(info.suffix_info().max_suffixes_capacity(),
- option.max_suffixes_size);
- EXPECT_EQ(info.suffix_info().suffixes_used(), 0);
- EXPECT_EQ(info.suffix_info().num_null_suffixes(), 0);
-
- EXPECT_EQ(info.node_info().dirty_pages(), 0);
- EXPECT_EQ(info.next_info().dirty_pages(), 0);
- EXPECT_EQ(info.suffix_info().dirty_pages(), 0);
-}
-
-TEST_F(IcingDynamicTrieTest, GetDebugInfoCorrectForWastedAndChildCounts) {
- IcingFilesystem filesystem;
- IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
- &filesystem);
- ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
- ASSERT_TRUE(trie.Init());
-
- uint32_t unused = 0;
- ASSERT_TRUE(trie.Insert("", &unused));
- ASSERT_TRUE(trie.Insert("ab", &unused));
- PrintTrie(trie);
- LexiconDebugInfoProto::NextInfo info =
- trie.GetDebugInfo(/*verbosity=*/1).next_info();
- // Root has 2 children "" and "ab", which are leaves. No wasted.
- EXPECT_THAT(info.wasted(), Each(0));
- uint32_t exp_child_counts1[IcingDynamicTrie::kMaxNextArraySize] = {0, 1};
- EXPECT_THAT(info.child_counts(), ElementsAreArray(exp_child_counts1));
-
- ASSERT_TRUE(trie.Insert("ac", &unused));
- PrintTrie(trie);
- info = trie.GetDebugInfo(/*verbosity=*/1).next_info();
- // Root has 2 children "" and "a".
- // "a" has 2 children "b" and "c".
- // No wasted.
- EXPECT_THAT(info.wasted(), Each(0));
- uint32_t exp_child_counts2[IcingDynamicTrie::kMaxNextArraySize] = {0, 2};
- EXPECT_THAT(info.child_counts(), ElementsAreArray(exp_child_counts2));
-
- ASSERT_TRUE(trie.Insert("ad", &unused));
- PrintTrie(trie);
- info = trie.GetDebugInfo(/*verbosity=*/1).next_info();
- // Root has 2 children "" and "a".
- // "a" has 3 children "b", "c", and "d".
- // 1 next wasted for "a", since 2^2 - 3 = 1
- EXPECT_THAT(info.wasted(), ElementsAre(0, 0, 1, 0, 0, 0, 0, 0, 0));
- uint32_t exp_child_counts3[IcingDynamicTrie::kMaxNextArraySize] = {0, 1, 1};
- EXPECT_THAT(info.child_counts(), ElementsAreArray(exp_child_counts3));
-}
-
-TEST_F(IcingDynamicTrieTest, GetDebugInfoCorrectForFreeList) {
- IcingFilesystem filesystem;
- IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
- &filesystem);
- ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
- ASSERT_TRUE(trie.Init());
-
- uint32_t unused = 0;
- ASSERT_TRUE(trie.Insert("", &unused));
- ASSERT_TRUE(trie.Insert("ab", &unused));
- ASSERT_TRUE(trie.Insert("ac", &unused));
- // No next arrays are freed yet.
- LexiconDebugInfoProto::NextInfo info =
- trie.GetDebugInfo(/*verbosity=*/0).next_info();
- EXPECT_THAT(info.num_free(), Each(0));
- EXPECT_EQ(info.total_free(), 0);
-
- ASSERT_TRUE(trie.Insert("ad", &unused));
- info = trie.GetDebugInfo(/*verbosity=*/0).next_info();
- // The next array of "a" with size 2 has been freed in order to expand to
- // size 4.
- EXPECT_THAT(info.num_free(), ElementsAre(0, 1, 0, 0, 0, 0, 0, 0, 0));
- EXPECT_EQ(info.total_free(), 2);
-
- ASSERT_TRUE(trie.Insert("ae", &unused));
- info = trie.GetDebugInfo(/*verbosity=*/0).next_info();
- // No change
- EXPECT_THAT(info.num_free(), ElementsAre(0, 1, 0, 0, 0, 0, 0, 0, 0));
- EXPECT_EQ(info.total_free(), 2);
-
- ASSERT_TRUE(trie.Insert("af", &unused));
- info = trie.GetDebugInfo(/*verbosity=*/0).next_info();
- // The next array of "a" with size 4 has been freed in order to expand to
- // size 8.
- EXPECT_THAT(info.num_free(), ElementsAre(0, 1, 1, 0, 0, 0, 0, 0, 0));
- EXPECT_EQ(info.total_free(), 2 + 4);
-}
-
} // namespace lib
} // namespace icing