aboutsummaryrefslogtreecommitdiff
path: root/icing/legacy
diff options
context:
space:
mode:
authorJiayu Hu <hujiayu@google.com>2022-06-15 18:01:29 -0700
committerJiayu Hu <hujiayu@google.com>2022-06-15 18:13:09 -0700
commit7c93c404e1fb4ed5e35326245ebc820ed774c6b2 (patch)
treefe159041f4c9358b9fd09b0e83ff551ff26b67a0 /icing/legacy
parente3dfbf68537a9865815a55623a92aed26bc80eb4 (diff)
downloadicing-7c93c404e1fb4ed5e35326245ebc820ed774c6b2.tar.gz
Sync from upstream.
Descriptions: ====================================================================== Export Icing logging tag to JNI ====================================================================== Update export_to_aosp.sh to change icing log tag to "AppSearchIcing" ====================================================================== Improve the logic of NamespaceChecker. ====================================================================== Step 4.1: Use ScoredDocumentHitsRanker in ResultStateV2 ====================================================================== Step 4.0: Create ScoredDocumentHitsRanker interface and PriorityQueueScoredDocumentHitsRanker ====================================================================== Refactor KeyMapper ====================================================================== Change Icing DEFAULT_LOGGING_LEVEL to INFO ====================================================================== Step 3.4: Create ResultRetrieverV2GroupResultLimiterTest (copied from ResultStateTest) ====================================================================== Add IcingDynamicTrie::IsBranchingTerm to check if a term is branching. ====================================================================== Fix IcingDynamicTrie::Delete bug ====================================================================== Step 3.3: Create ResultRetrieverV2ProjectionTest (copied from ResultRetrieverTest) ====================================================================== Step 3.2: Create ResultRetrieverV2SnippetTest (copied from ResultRetrieverTest) ====================================================================== Step 3.1: Create ResultRetrieverV2Test (copied from ResultRetrieverTest) ====================================================================== Enable legacy multidex ====================================================================== Fix NPE caused by improper handling of return value of GetFileSize. ====================================================================== Step 3.0: Create ResultRetrieverV2 (copied from ResultRetriever) ====================================================================== Step 2: Create PageResult (copied from PageResultState) ====================================================================== Change AppSearch hawkeye testing app multidex to legacy. ====================================================================== Fix icing-search-engine_benchmark bug ====================================================================== (Small fix for step 1) Fix unit test stack memory error ====================================================================== Step 1: Create ResultStateV2 and ResultStateV2Test (copied from ResultState, ResultStateTest) ====================================================================== Bug: 146903474 Bug: 152934343 Bug: 193919210 Bug: 231368517 Bug: 232273174 Bug: 233470404 Bug: 233657885 Change-Id: Iae5461f8650c2bd58f683128fdd243d90403bc06
Diffstat (limited to 'icing/legacy')
-rw-r--r--icing/legacy/index/icing-dynamic-trie.cc93
-rw-r--r--icing/legacy/index/icing-dynamic-trie.h17
-rw-r--r--icing/legacy/index/icing-dynamic-trie_test.cc219
3 files changed, 308 insertions, 21 deletions
diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
index 77876c4..4428599 100644
--- a/icing/legacy/index/icing-dynamic-trie.cc
+++ b/icing/legacy/index/icing-dynamic-trie.cc
@@ -101,15 +101,9 @@ namespace {
constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1;
constexpr uint32_t kInvalidNextIndex = ~0U;
-// Returns the number of valid nexts in the array.
-int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
- int next_array_length) {
- int valid_nexts_length = 0;
- for (; valid_nexts_length < next_array_length &&
- next_array_start[valid_nexts_length].node_index() != kInvalidNodeIndex;
- ++valid_nexts_length) {
- }
- return valid_nexts_length;
+void ResetMutableNext(IcingDynamicTrie::Next &mutable_next) {
+ mutable_next.set_val(0xff);
+ mutable_next.set_node_index(kInvalidNodeIndex);
}
} // namespace
@@ -769,8 +763,7 @@ IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
// Fill with char 0xff so we are sorted properly.
for (int i = 0; i < aligned_size; i++) {
- ret[i].set_val(0xff);
- ret[i].set_node_index(kInvalidNodeIndex);
+ ResetMutableNext(ret[i]);
}
return ret;
}
@@ -1550,9 +1543,7 @@ bool IcingDynamicTrie::ResetNext(uint32_t next_index) {
if (mutable_next == nullptr) {
return false;
}
-
- mutable_next->set_val(0);
- mutable_next->set_node_index(kInvalidNodeIndex);
+ ResetMutableNext(*mutable_next);
return true;
}
@@ -1570,7 +1561,7 @@ bool IcingDynamicTrie::SortNextArray(const Node *node) {
return false;
}
- std::sort(next_array_start, next_array_start + next_array_buffer_size - 1);
+ std::sort(next_array_start, next_array_start + next_array_buffer_size);
return true;
}
@@ -2116,22 +2107,33 @@ const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
return found;
}
+int IcingDynamicTrie::GetValidNextsSize(
+ IcingDynamicTrie::Next *next_array_start, int next_array_length) const {
+ // Only searching for key char 0xff is not sufficient, as 0xff can be a valid
+ // character. We must also specify kInvalidNodeIndex as the target node index
+ // when searching the next array.
+ return LowerBound(next_array_start, next_array_start + next_array_length,
+ /*key_char=*/0xff, /*node_index=*/kInvalidNodeIndex) -
+ next_array_start;
+}
+
const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
- const Next *start, const Next *end, uint8_t key_char) const {
+ const Next *start, const Next *end, uint8_t key_char,
+ uint32_t node_index) const {
// Above this value will use binary search instead of linear
// search. 16 was chosen from running some benchmarks with
// different values.
static const uint32_t kBinarySearchCutoff = 16;
+ Next key_next(key_char, node_index);
if (end - start >= kBinarySearchCutoff) {
// Binary search.
- Next key_next(key_char, 0);
return lower_bound(start, end, key_next);
} else {
// Linear search.
const Next *found;
for (found = start; found < end; found++) {
- if (found->val() >= key_char) {
+ if (!(*found < key_next)) {
// Should have gotten match.
break;
}
@@ -2275,6 +2277,40 @@ std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key,
return prefix_lengths;
}
+bool IcingDynamicTrie::IsBranchingTerm(const char *key) const {
+ if (!is_initialized()) {
+ ICING_LOG(FATAL) << "DynamicTrie not initialized";
+ }
+
+ if (storage_->empty()) {
+ return false;
+ }
+
+ uint32_t best_node_index;
+ int key_offset;
+ FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true);
+ const Node *cur_node = storage_->GetNode(best_node_index);
+
+ if (cur_node->is_leaf()) {
+ return false;
+ }
+
+ // key is not present in the trie.
+ if (key[key_offset] != '\0') {
+ return false;
+ }
+
+ // Found key as an intermediate node, but key is not a valid term stored in
+ // the trie.
+ if (GetNextByChar(cur_node, '\0') == nullptr) {
+ return false;
+ }
+
+ // The intermediate node for key must have more than two children for key to
+ // be a branching term, one of which represents the leaf node for key itself.
+ return cur_node->log2_num_children() > 1;
+}
+
void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
Stats stats;
CollectStats(&stats);
@@ -2500,7 +2536,26 @@ bool IcingDynamicTrie::Delete(const std::string_view key) {
for (uint32_t next_index : nexts_to_reset) {
ResetNext(next_index);
}
- SortNextArray(last_multichild_node);
+
+ if (last_multichild_node != nullptr) {
+ SortNextArray(last_multichild_node);
+ uint32_t next_array_buffer_size =
+ 1u << last_multichild_node->log2_num_children();
+ Next *next_array_start = this->storage_->GetMutableNextArray(
+ last_multichild_node->next_index(), next_array_buffer_size);
+ uint32_t num_children =
+ GetValidNextsSize(next_array_start, next_array_buffer_size);
+ // Shrink the next array if we can.
+ if (num_children == next_array_buffer_size / 2) {
+ Node *mutable_node = storage_->GetMutableNode(
+ storage_->GetNodeIndex(last_multichild_node));
+ mutable_node->set_log2_num_children(mutable_node->log2_num_children() -
+ 1);
+ // Add the unused second half of the next array to the free list.
+ storage_->FreeNextArray(next_array_start + next_array_buffer_size / 2,
+ mutable_node->log2_num_children());
+ }
+ }
return true;
}
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index 013b926..ec8b31a 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -400,6 +400,16 @@ class IcingDynamicTrie : public IIcingStorage {
// itself. If utf8 is true, does not cut key mid-utf8.
std::vector<int> FindBranchingPrefixLengths(const char *key, bool utf8) const;
+ // Check if key is a branching term.
+ //
+ // key is a branching term, if and only if there exists terms s1 and s2 in the
+ // trie such that key is the maximum common prefix of s1 and s2, but s1 and s2
+ // are not prefixes of each other.
+ //
+ // The function assumes that key is already present in the trie. Otherwise,
+ // false will be returned.
+ bool IsBranchingTerm(const char *key) const;
+
void GetDebugInfo(int verbosity, std::string *out) const override;
double min_free_fraction() const;
@@ -612,8 +622,11 @@ class IcingDynamicTrie : public IIcingStorage {
// Helpers for Find and Insert.
const Next *GetNextByChar(const Node *node, uint8_t key_char) const;
- const Next *LowerBound(const Next *start, const Next *end,
- uint8_t key_char) const;
+ const Next *LowerBound(const Next *start, const Next *end, uint8_t key_char,
+ uint32_t node_index = 0) const;
+ // Returns the number of valid nexts in the array.
+ int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
+ int next_array_length) const;
void FindBestNode(const char *key, uint32_t *best_node_index, int *key_offset,
bool prefix, bool utf8 = false) const;
diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc
index c8d1403..b69ee64 100644
--- a/icing/legacy/index/icing-dynamic-trie_test.cc
+++ b/icing/legacy/index/icing-dynamic-trie_test.cc
@@ -20,6 +20,7 @@
#include <memory>
#include <string>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
@@ -27,13 +28,16 @@
#include "gtest/gtest.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/testing/random-string.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/util/logging.h"
namespace icing {
namespace lib {
namespace {
+using testing::ContainerEq;
using testing::ElementsAre;
constexpr std::string_view kKeys[] = {
@@ -962,6 +966,102 @@ TEST_F(IcingDynamicTrieTest, DeletingNonExistingKeyShouldReturnTrue) {
EXPECT_TRUE(trie.Find("bed", &value));
}
+TEST_F(IcingDynamicTrieTest, DeletionResortsFullNextArray) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+ // 'f' -> [ 'a', 'j', 'o', 'u' ]
+ ASSERT_TRUE(trie.Insert("foul", &value));
+ ASSERT_TRUE(trie.Insert("far", &value));
+ ASSERT_TRUE(trie.Insert("fudge", &value));
+ ASSERT_TRUE(trie.Insert("fjord", &value));
+
+ // Delete the third child
+ EXPECT_TRUE(trie.Delete("foul"));
+
+ std::vector<std::string> remaining;
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.push_back(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ElementsAre("far", "fjord", "fudge"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionResortsPartiallyFilledNextArray) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+ // 'f' -> [ 'a', 'o', 'u', 0xFF ]
+ ASSERT_TRUE(trie.Insert("foul", &value));
+ ASSERT_TRUE(trie.Insert("far", &value));
+ ASSERT_TRUE(trie.Insert("fudge", &value));
+
+ // Delete the second child
+ EXPECT_TRUE(trie.Delete("foul"));
+
+ std::vector<std::string> remaining;
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.push_back(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ElementsAre("far", "fudge"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionLoadTest) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ std::default_random_engine random;
+ ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed;
+ std::vector<std::string> terms;
+ uint32_t value;
+ // Randomly generate 2048 terms.
+ for (int i = 0; i < 2048; ++i) {
+ terms.push_back(RandomString("abcdefg", 5, &random));
+ ASSERT_TRUE(trie.Insert(terms.back().c_str(), &value));
+ }
+
+ // Randomly delete 1024 terms.
+ std::unordered_set<std::string> exp_remaining(terms.begin(), terms.end());
+ std::shuffle(terms.begin(), terms.end(), random);
+ for (int i = 0; i < 1024; ++i) {
+ exp_remaining.erase(terms[i]);
+ ASSERT_TRUE(trie.Delete(terms[i].c_str()));
+ }
+
+ // Check that the iterator still works, and the remaining terms are correct.
+ std::unordered_set<std::string> remaining;
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.insert(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ContainerEq(exp_remaining));
+
+ // Check that we can still insert terms after delete.
+ for (int i = 0; i < 2048; ++i) {
+ std::string term = RandomString("abcdefg", 5, &random);
+ ASSERT_TRUE(trie.Insert(term.c_str(), &value));
+ exp_remaining.insert(term);
+ }
+ remaining.clear();
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.insert(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ContainerEq(exp_remaining));
+}
+
} // namespace
// The tests below are accessing private methods and fields of IcingDynamicTrie
@@ -1133,5 +1233,124 @@ TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) {
ASSERT_EQ(0, trie.property_bitmaps_.size());
}
+TEST_F(IcingDynamicTrieTest, IsBranchingTerm) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+
+ ASSERT_TRUE(trie.Insert("", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+
+ ASSERT_TRUE(trie.Insert("ab", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+
+ ASSERT_TRUE(trie.Insert("ac", &value));
+ // "" is a prefix of "ab" and "ac", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+
+ ASSERT_TRUE(trie.Insert("ba", &value));
+ // "" now branches to "ba"
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+
+ ASSERT_TRUE(trie.Insert("a", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ // "a" branches to "ab" and "ac"
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+
+ ASSERT_TRUE(trie.Insert("abc", &value));
+ ASSERT_TRUE(trie.Insert("acd", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ // "ab" is a prefix of "abc", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ // "ac" is a prefix of "acd", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+
+ ASSERT_TRUE(trie.Insert("abcd", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ // "ab" is a prefix of "abc" and "abcd", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+ // "abc" is a prefix of "abcd", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
+
+ ASSERT_TRUE(trie.Insert("abd", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ // "ab" branches to "abc" and "abd"
+ EXPECT_TRUE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abd"));
+}
+
+TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForNonExistingTerms) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+
+ ASSERT_TRUE(trie.Insert("aa", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("ab", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("ac", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("ad", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("abcd", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+ ASSERT_TRUE(trie.Insert("abce", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+ ASSERT_TRUE(trie.Insert("abcf", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+ ASSERT_TRUE(trie.Insert("abc_suffix", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc_s"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc_su"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc_suffi"));
+}
+
} // namespace lib
} // namespace icing