aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-04-19 18:08:47 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-04-19 18:08:47 +0000
commitb5d722c7687592b47cdb2e2a18377df45aca144e (patch)
tree05b131543d5192b4ca175830f2fb507899298a05
parentbc49e6f3d44af3611a5edc02ead7507f1ce1d431 (diff)
parenta81a0c8cb5c424e6eed6cec1bd687bc4d609e6cd (diff)
downloadicing-androidx-lifecycle-release.tar.gz
Snap for 11739378 from a81a0c8cb5c424e6eed6cec1bd687bc4d609e6cd to androidx-lifecycle-releaseandroidx-lifecycle-release
Change-Id: I430307224fdc13e426fcfcd90468567d316c7946
-rw-r--r--icing/file/file-backed-vector_benchmark.cc2
-rw-r--r--icing/file/persistent-hash-map.cc2
-rw-r--r--icing/file/portable-file-backed-proto-log_test.cc3
-rw-r--r--icing/file/posting_list/flash-index-storage.cc8
-rw-r--r--icing/file/posting_list/flash-index-storage_test.cc9
-rw-r--r--icing/file/posting_list/index-block_test.cc2
-rw-r--r--icing/file/posting_list/posting-list-accessor.cc11
-rw-r--r--icing/file/posting_list/posting-list-identifier.h2
-rw-r--r--icing/file/version-util.cc4
-rw-r--r--icing/file/version-util.h15
-rw-r--r--icing/file/version-util_test.cc12
-rw-r--r--icing/icing-search-engine.cc325
-rw-r--r--icing/icing-search-engine.h34
-rw-r--r--icing/icing-search-engine_benchmark.cc17
-rw-r--r--icing/icing-search-engine_initialization_test.cc676
-rw-r--r--icing/icing-search-engine_optimize_test.cc12
-rw-r--r--icing/icing-search-engine_schema_test.cc23
-rw-r--r--icing/icing-search-engine_search_test.cc1134
-rw-r--r--icing/index/index-processor_benchmark.cc21
-rw-r--r--icing/index/index-processor_test.cc79
-rw-r--r--icing/index/index.cc8
-rw-r--r--icing/index/index.h8
-rw-r--r--icing/index/index_test.cc222
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id.cc1
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id.h12
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc44
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.cc34
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.h26
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and_test.cc154
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.h7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc24
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-none.h4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.cc14
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.h13
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not_test.cc45
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.cc35
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.h23
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or_test.cc156
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-document.cc65
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-document.h73
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc12
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.h7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc5
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc230
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.h100
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc230
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-test-util.h26
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h148
-rw-r--r--icing/index/iterator/section-restrict-data.cc82
-rw-r--r--icing/index/iterator/section-restrict-data.h98
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc3
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.h12
-rw-r--r--icing/index/lite/lite-index-header.h25
-rw-r--r--icing/index/lite/lite-index-options.cc16
-rw-r--r--icing/index/lite/lite-index-options.h7
-rw-r--r--icing/index/lite/lite-index.cc6
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc9
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.h12
-rw-r--r--icing/index/main/posting-list-hit-serializer.cc5
-rw-r--r--icing/index/main/posting-list-hit-serializer.h15
-rw-r--r--icing/index/main/posting-list-hit-serializer_test.cc56
-rw-r--r--icing/index/numeric/doc-hit-info-iterator-numeric.h16
-rw-r--r--icing/index/numeric/dummy-numeric-index.h11
-rw-r--r--icing/index/numeric/integer-index-storage.cc49
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc125
-rw-r--r--icing/index/numeric/integer-index.cc2
-rw-r--r--icing/index/numeric/integer-index_test.cc137
-rw-r--r--icing/index/numeric/numeric-index.h5
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.cc7
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.h6
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer_test.cc24
-rw-r--r--icing/index/property-existence-indexing-handler.cc127
-rw-r--r--icing/index/property-existence-indexing-handler.h86
-rw-r--r--icing/index/property-existence-indexing-handler_test.cc524
-rw-r--r--icing/index/string-section-indexing-handler.cc68
-rw-r--r--icing/index/string-section-indexing-handler.h28
-rw-r--r--icing/index/term-indexing-handler.cc146
-rw-r--r--icing/index/term-indexing-handler.h97
-rw-r--r--icing/index/term-indexing-handler_test.cc (renamed from icing/index/string-section-indexing-handler_test.cc)109
-rw-r--r--icing/join/document-id-to-join-info.h67
-rw-r--r--icing/join/join-processor.cc128
-rw-r--r--icing/join/join-processor_test.cc399
-rw-r--r--icing/join/posting-list-join-data-accessor.h211
-rw-r--r--icing/join/posting-list-join-data-accessor_test.cc435
-rw-r--r--icing/join/posting-list-join-data-serializer.h803
-rw-r--r--icing/join/posting-list-join-data-serializer_test.cc653
-rw-r--r--icing/join/qualified-id-join-index-impl-v1.cc (renamed from icing/join/qualified-id-join-index.cc)81
-rw-r--r--icing/join/qualified-id-join-index-impl-v1.h327
-rw-r--r--icing/join/qualified-id-join-index-impl-v1_test.cc (renamed from icing/join/qualified-id-join-index_test.cc)421
-rw-r--r--icing/join/qualified-id-join-index-impl-v2.cc681
-rw-r--r--icing/join/qualified-id-join-index-impl-v2.h369
-rw-r--r--icing/join/qualified-id-join-index-impl-v2_test.cc1414
-rw-r--r--icing/join/qualified-id-join-index.h275
-rw-r--r--icing/join/qualified-id-join-indexing-handler-v1_test.cc558
-rw-r--r--icing/join/qualified-id-join-indexing-handler.cc117
-rw-r--r--icing/join/qualified-id-join-indexing-handler.h12
-rw-r--r--icing/join/qualified-id-join-indexing-handler_test.cc591
-rw-r--r--icing/legacy/index/icing-dynamic-trie_test.cc10
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.cc105
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.h46
-rw-r--r--icing/monkey_test/icing-search-engine_monkey_test.cc19
-rw-r--r--icing/monkey_test/in-memory-icing-search-engine.cc133
-rw-r--r--icing/monkey_test/in-memory-icing-search-engine.h33
-rw-r--r--icing/monkey_test/monkey-test-generators.cc287
-rw-r--r--icing/monkey_test/monkey-test-generators.h84
-rw-r--r--icing/monkey_test/monkey-test-util.h68
-rw-r--r--icing/monkey_test/monkey-tokenized-document.h3
-rw-r--r--icing/portable/platform.h14
-rw-r--r--icing/query/advanced_query_parser/parser.cc24
-rw-r--r--icing/query/advanced_query_parser/query-visitor.cc43
-rw-r--r--icing/query/advanced_query_parser/query-visitor.h14
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc1017
-rw-r--r--icing/query/query-features.h6
-rw-r--r--icing/query/query-processor.cc11
-rw-r--r--icing/query/query-processor_test.cc145
-rw-r--r--icing/schema/schema-store.cc8
-rw-r--r--icing/schema/schema-util.cc58
-rw-r--r--icing/schema/schema-util.h16
-rw-r--r--icing/schema/schema-util_test.cc111
-rw-r--r--icing/scoring/scoring-processor.cc17
-rw-r--r--icing/scoring/scoring-processor.h8
-rw-r--r--icing/store/document-store.cc158
-rw-r--r--icing/store/document-store.h58
-rw-r--r--icing/store/document-store_benchmark.cc5
-rw-r--r--icing/store/document-store_test.cc326
-rw-r--r--icing/store/namespace-fingerprint-identifier.cc73
-rw-r--r--icing/store/namespace-fingerprint-identifier.h72
-rw-r--r--icing/store/namespace-fingerprint-identifier_test.cc148
-rw-r--r--icing/store/usage-store_test.cc38
-rw-r--r--icing/testing/common-matchers.h34
-rw-r--r--icing/tokenization/combined-tokenizer_test.cc2
-rw-r--r--icing/tokenization/icu/icu-language-segmenter_test.cc13
-rw-r--r--icing/tokenization/raw-query-tokenizer_test.cc2
-rw-r--r--icing/transform/icu/icu-normalizer.cc1
-rw-r--r--icing/transform/icu/icu-normalizer.h3
-rw-r--r--icing/transform/icu/icu-normalizer_benchmark.cc47
-rw-r--r--icing/transform/icu/icu-normalizer_test.cc79
-rw-r--r--icing/util/document-validator.cc2
-rw-r--r--icing/util/i18n-utils.cc4
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngine.java2
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngineImpl.java2
-rw-r--r--proto/icing/proto/initialize.proto11
-rw-r--r--proto/icing/proto/logging.proto80
-rw-r--r--proto/icing/proto/optimize.proto8
-rw-r--r--synced_AOSP_CL_number.txt2
146 files changed, 14380 insertions, 2653 deletions
diff --git a/icing/file/file-backed-vector_benchmark.cc b/icing/file/file-backed-vector_benchmark.cc
index b2e660b..0447e93 100644
--- a/icing/file/file-backed-vector_benchmark.cc
+++ b/icing/file/file-backed-vector_benchmark.cc
@@ -68,7 +68,7 @@ void BM_Set(benchmark::State& state) {
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
// Extend to num_elts
- fbv->Set(num_elts - 1, 0);
+ ICING_ASSERT_OK(fbv->Set(num_elts - 1, 0));
std::uniform_int_distribution<> distrib(0, num_elts - 1);
for (auto _ : state) {
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 558c242..6936c45 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -716,7 +716,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
// # of vector elements may be greater than the actual # of entries.
// Therefore, we have to truncate entry_storage_ to the correct size.
if (entry_idx < entry_storage_->num_elements()) {
- entry_storage_->TruncateTo(entry_idx);
+ ICING_RETURN_IF_ERROR(entry_storage_->TruncateTo(entry_idx));
}
info().num_deleted_entries = 0;
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index bf5e604..cc70151 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -1124,7 +1124,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
// document1_offset + sizeof(int) is the start byte of the proto where
// sizeof(int) is the size of the proto metadata.
- mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1);
+ ICING_ASSERT_OK(
+ mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1));
for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
ASSERT_THAT(mmapped_file.region()[i], Eq(0));
}
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
index 21fea8a..2198d2c 100644
--- a/icing/file/posting_list/flash-index-storage.cc
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -75,7 +75,11 @@ FlashIndexStorage::ReadHeaderMagic(const Filesystem* filesystem,
FlashIndexStorage::~FlashIndexStorage() {
if (header_block_ != nullptr) {
- FlushInMemoryFreeList();
+ libtextclassifier3::Status status = FlushInMemoryFreeList();
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << "Cannot flush in memory free list: "
+ << status.error_message();
+ }
PersistToDisk();
}
}
@@ -488,7 +492,7 @@ libtextclassifier3::Status FlashIndexStorage::FreePostingList(
ICING_ASSIGN_OR_RETURN(IndexBlock block,
GetIndexBlock(holder.id.block_index()));
if (block.posting_list_bytes() == max_posting_list_bytes()) {
- block.SetNextBlockIndex(kInvalidBlockIndex);
+ ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(kInvalidBlockIndex));
}
uint32_t posting_list_bytes = block.posting_list_bytes();
diff --git a/icing/file/posting_list/flash-index-storage_test.cc b/icing/file/posting_list/flash-index-storage_test.cc
index 3e2d239..ef60037 100644
--- a/icing/file/posting_list/flash-index-storage_test.cc
+++ b/icing/file/posting_list/flash-index-storage_test.cc
@@ -249,7 +249,8 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) {
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
// 3. Now, free the first posting list. This should add it to the free list
- flash_index_storage.FreePostingList(std::move(posting_list_holder1));
+ ICING_ASSERT_OK(
+ flash_index_storage.FreePostingList(std::move(posting_list_holder1)));
// 4. Request another posting list. This should NOT grow the index because
// the first posting list is free.
@@ -349,7 +350,8 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) {
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
// 3. Now, free the first posting list. This should add it to the free list
- flash_index_storage.FreePostingList(std::move(posting_list_holder1));
+ ICING_ASSERT_OK(
+ flash_index_storage.FreePostingList(std::move(posting_list_holder1)));
// 4. Request another posting list. This should NOT grow the index because
// the first posting list is free.
@@ -452,7 +454,8 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) {
// 3. Now, free the first posting list. This should add it to the free
// list
- flash_index_storage.FreePostingList(std::move(posting_list_holder1));
+ ICING_ASSERT_OK(
+ flash_index_storage.FreePostingList(std::move(posting_list_holder1)));
}
EXPECT_THAT(flash_index_storage.GetDiskUsage(),
diff --git a/icing/file/posting_list/index-block_test.cc b/icing/file/posting_list/index-block_test.cc
index fcc134a..ebc9ba4 100644
--- a/icing/file/posting_list/index-block_test.cc
+++ b/icing/file/posting_list/index-block_test.cc
@@ -292,7 +292,7 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Now free the first posting list. Then, reallocate it and fill it with a
// different set of hits.
- block.FreePostingList(alloc_info_1.posting_list_index);
+ ICING_ASSERT_OK(block.FreePostingList(alloc_info_1.posting_list_index));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
std::vector<Hit> hits_in_posting_list3{
diff --git a/icing/file/posting_list/posting-list-accessor.cc b/icing/file/posting_list/posting-list-accessor.cc
index 67d7a21..a7cdb17 100644
--- a/icing/file/posting_list/posting-list-accessor.cc
+++ b/icing/file/posting_list/posting-list-accessor.cc
@@ -16,7 +16,10 @@
#include <cstdint>
#include <memory>
+#include <utility>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
@@ -40,13 +43,15 @@ libtextclassifier3::Status PostingListAccessor::FlushPreexistingPostingList() {
// and free this posting list.
//
// Move will always succeed since in_memory_posting_list_ is max_pl_bytes.
- GetSerializer()->MoveFrom(/*dst=*/&in_memory_posting_list_,
- /*src=*/&preexisting_posting_list_->posting_list);
+ ICING_RETURN_IF_ERROR(GetSerializer()->MoveFrom(
+ /*dst=*/&in_memory_posting_list_,
+ /*src=*/&preexisting_posting_list_->posting_list));
// Now that all the contents of this posting list have been copied, there's
// no more use for it. Make it available to be used for another posting
// list.
- storage_->FreePostingList(std::move(*preexisting_posting_list_));
+ ICING_RETURN_IF_ERROR(
+ storage_->FreePostingList(std::move(*preexisting_posting_list_)));
}
preexisting_posting_list_.reset();
return libtextclassifier3::Status::OK;
diff --git a/icing/file/posting_list/posting-list-identifier.h b/icing/file/posting_list/posting-list-identifier.h
index 78821e8..8a0229b 100644
--- a/icing/file/posting_list/posting-list-identifier.h
+++ b/icing/file/posting_list/posting-list-identifier.h
@@ -59,6 +59,8 @@ class PostingListIdentifier {
public:
static PostingListIdentifier kInvalid;
+ explicit PostingListIdentifier() { *this = kInvalid; }
+
// 1. block_index - the index of this block within the FlashIndexStorage file
// 2. posting_list_index - the index of this posting list within the block
// 3. posting_list_index_bits - the number of bits needed to encode the
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc
index 7684262..dd233e0 100644
--- a/icing/file/version-util.cc
+++ b/icing/file/version-util.cc
@@ -131,6 +131,10 @@ bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
// version 1 -> version 2 upgrade, no need to rebuild
break;
}
+ case 2: {
+ // version 2 -> version 3 upgrade, no need to rebuild
+ break;
+ }
default:
// This should not happen. Rebuild anyway if unsure.
should_rebuild |= true;
diff --git a/icing/file/version-util.h b/icing/file/version-util.h
index 30c457d..b2d51df 100644
--- a/icing/file/version-util.h
+++ b/icing/file/version-util.h
@@ -27,17 +27,18 @@ namespace lib {
namespace version_util {
-// - Version 0: Android T. Can be identified only by flash index magic.
-// - Version 1: Android U release 2023-06.
-// - Version 2: Android U 1st mainline release 2023-09. Schema is compatible
-// with version 1.
-// TODO(b/288969109): bump kVersion to 2 before finalizing the 1st Android U
-// mainline release.
+// - Version 0: Android T base. Can be identified only by flash index magic.
+// - Version 1: Android U base and M-2023-08.
+// - Version 2: M-2023-09, M-2023-11, M-2024-01. Schema is compatible with v1.
+// (There were no M-2023-10, M-2023-12).
+// - Version 3: M-2024-02. Schema is compatible with v1 and v2.
+//
// LINT.IfChange(kVersion)
-inline static constexpr int32_t kVersion = 2;
+inline static constexpr int32_t kVersion = 3;
// LINT.ThenChange(//depot/google3/icing/schema/schema-store.cc:min_overlay_version_compatibility)
inline static constexpr int32_t kVersionOne = 1;
inline static constexpr int32_t kVersionTwo = 2;
+inline static constexpr int32_t kVersionThree = 3;
inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae;
diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc
index e94c351..9dedb1d 100644
--- a/icing/file/version-util_test.cc
+++ b/icing/file/version-util_test.cc
@@ -458,13 +458,23 @@ TEST(VersionUtilTest, ShouldRebuildDerivedFilesCompatible) {
IsFalse());
}
-TEST(VersionUtilTest, ShouldRebuildDerivedFilesUpgrade) {
+TEST(VersionUtilTest, Upgrade) {
// Unlike other state changes, upgrade depends on the actual "encoded path".
// kVersionOne -> kVersionTwo
EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
/*curr_version=*/kVersionTwo),
IsFalse());
+
+ // kVersionTwo -> kVersionThree
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionTwo, kVersionTwo),
+ /*curr_version=*/kVersionThree),
+ IsFalse());
+
+ // kVersionOne -> kVersionThree.
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
+ /*curr_version=*/kVersionThree),
+ IsFalse());
}
} // namespace
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 6680dae..72be4e9 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -40,8 +40,10 @@
#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/term-indexing-handler.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -87,6 +89,7 @@
#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
@@ -227,6 +230,29 @@ libtextclassifier3::Status ValidateSuggestionSpec(
return libtextclassifier3::Status::OK;
}
+bool IsV2QualifiedIdJoinIndexEnabled(const IcingSearchEngineOptions& options) {
+ return options.use_new_qualified_id_join_index() &&
+ options.document_store_namespace_id_fingerprint();
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+CreateQualifiedIdJoinIndex(const Filesystem& filesystem,
+ std::string qualified_id_join_index_dir,
+ const IcingSearchEngineOptions& options) {
+ if (IsV2QualifiedIdJoinIndexEnabled(options)) {
+ // V2
+ return QualifiedIdJoinIndexImplV2::Create(
+ filesystem, std::move(qualified_id_join_index_dir),
+ options.pre_mapping_fbv());
+ } else {
+ // V1
+ // TODO(b/275121148): deprecate this part after rollout v2.
+ return QualifiedIdJoinIndexImplV1::Create(
+ filesystem, std::move(qualified_id_join_index_dir),
+ options.pre_mapping_fbv(), options.use_persistent_hash_map());
+ }
+}
+
// Version file is a single file under base_dir containing version info of the
// existing data.
std::string MakeVersionFilePath(const std::string& base_dir) {
@@ -639,24 +665,33 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
"Could not delete directories: ", index_dir, ", ", integer_index_dir,
", ", qualified_id_join_index_dir, " and ", doc_store_dir));
}
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(
- /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
- index_init_status = InitializeIndex(initialize_stats);
+ ICING_ASSIGN_OR_RETURN(
+ bool document_store_derived_files_regenerated,
+ InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false,
+ initialize_stats));
+ index_init_status = InitializeIndex(
+ document_store_derived_files_regenerated, initialize_stats);
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
} else if (filesystem_->FileExists(marker_filepath.c_str())) {
// If the marker file is still around then something wonky happened when we
// last tried to set the schema.
+ //
+ // Since we're going to rebuild all indices in this case, the return value
+ // of InitializeDocumentStore (document_store_derived_files_regenerated) is
+ // unused.
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/true, initialize_stats));
// We're going to need to build the index from scratch. So just delete its
// directory now.
// Discard index directory and instantiate a new one.
- Index::Options index_options(index_dir, options_.index_merge_size(),
- options_.lite_index_sort_at_indexing(),
- options_.lite_index_sort_size());
+ Index::Options index_options(
+ index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
+ options_.build_property_existence_metadata_hits());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
return absl_ports::InternalError(
@@ -684,9 +719,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
*filesystem_, qualified_id_join_index_dir));
ICING_ASSIGN_OR_RETURN(
qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(
- *filesystem_, std::move(qualified_id_join_index_dir),
- options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
+ CreateQualifiedIdJoinIndex(
+ *filesystem_, std::move(qualified_id_join_index_dir), options_));
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
@@ -711,9 +745,12 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
initialize_stats->set_qualified_id_join_index_restoration_cause(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
} else if (version_state_change != version_util::StateChange::kCompatible) {
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(
- /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
- index_init_status = InitializeIndex(initialize_stats);
+ ICING_ASSIGN_OR_RETURN(bool document_store_derived_files_regenerated,
+ InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/true,
+ initialize_stats));
+ index_init_status = InitializeIndex(
+ document_store_derived_files_regenerated, initialize_stats);
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
@@ -729,9 +766,13 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
initialize_stats->set_qualified_id_join_index_restoration_cause(
InitializeStatsProto::VERSION_CHANGED);
} else {
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(
- /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
- index_init_status = InitializeIndex(initialize_stats);
+ ICING_ASSIGN_OR_RETURN(
+ bool document_store_derived_files_regenerated,
+ InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false,
+ initialize_stats));
+ index_init_status = InitializeIndex(
+ document_store_derived_files_regenerated, initialize_stats);
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
@@ -765,7 +806,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
+libtextclassifier3::StatusOr<bool> IcingSearchEngine::InitializeDocumentStore(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
@@ -787,10 +828,11 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
options_.compression_level(), initialize_stats));
document_store_ = std::move(create_result.document_store);
- return libtextclassifier3::Status::OK;
+ return create_result.derived_files_regenerated;
}
libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
+ bool document_store_derived_files_regenerated,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
@@ -800,9 +842,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
return absl_ports::InternalError(
absl_ports::StrCat("Could not create directory: ", index_dir));
}
- Index::Options index_options(index_dir, options_.index_merge_size(),
- options_.lite_index_sort_at_indexing(),
- options_.lite_index_sort_size());
+ Index::Options index_options(
+ index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
+ options_.build_property_existence_metadata_hits());
// Term index
InitializeStatsProto::RecoveryCause index_recovery_cause;
@@ -862,29 +905,44 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
std::string qualified_id_join_index_dir =
MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
- auto qualified_id_join_index_or = QualifiedIdJoinIndex::Create(
- *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(),
- options_.use_persistent_hash_map());
- if (!qualified_id_join_index_or.ok()) {
+ if (document_store_derived_files_regenerated &&
+ IsV2QualifiedIdJoinIndexEnabled(options_)) {
+ // V2 qualified id join index depends on document store derived files, so we
+ // have to rebuild it from scratch if
+ // document_store_derived_files_regenerated is true.
ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
*filesystem_, qualified_id_join_index_dir));
- qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
-
- // Try recreating it from scratch and rebuild everything.
ICING_ASSIGN_OR_RETURN(
qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(
- *filesystem_, std::move(qualified_id_join_index_dir),
- options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
- } else {
- // Qualified id join index was created fine.
- qualified_id_join_index_ =
- std::move(qualified_id_join_index_or).ValueOrDie();
- // If a recover does have to happen, then it must be because the index is
- // out of sync with the document store.
+ CreateQualifiedIdJoinIndex(
+ *filesystem_, std::move(qualified_id_join_index_dir), options_));
+
qualified_id_join_index_recovery_cause =
- InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ InitializeStatsProto::DEPENDENCIES_CHANGED;
+ } else {
+ auto qualified_id_join_index_or = CreateQualifiedIdJoinIndex(
+ *filesystem_, qualified_id_join_index_dir, options_);
+ if (!qualified_id_join_index_or.ok()) {
+ ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+ *filesystem_, qualified_id_join_index_dir));
+
+ qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+ // Try recreating it from scratch and rebuild everything.
+ ICING_ASSIGN_OR_RETURN(
+ qualified_id_join_index_,
+ CreateQualifiedIdJoinIndex(
+ *filesystem_, std::move(qualified_id_join_index_dir), options_));
+ } else {
+ // Qualified id join index was created fine.
+ qualified_id_join_index_ =
+ std::move(qualified_id_join_index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ qualified_id_join_index_recovery_cause =
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ }
}
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
@@ -1556,33 +1614,41 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// TODO(b/143646633): figure out if we need to optimize index and doc store
// at the same time.
std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
- libtextclassifier3::StatusOr<std::vector<DocumentId>>
- document_id_old_to_new_or = OptimizeDocumentStore(optimize_stats);
+ libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+ optimize_result_or = OptimizeDocumentStore(optimize_stats);
optimize_stats->set_document_store_optimize_latency_ms(
optimize_doc_store_timer->GetElapsedMilliseconds());
- if (!document_id_old_to_new_or.ok() &&
- !absl_ports::IsDataLoss(document_id_old_to_new_or.status())) {
+ if (!optimize_result_or.ok() &&
+ !absl_ports::IsDataLoss(optimize_result_or.status())) {
// The status now is either ABORTED_ERROR or INTERNAL_ERROR.
// If ABORTED_ERROR, Icing should still be working.
// If INTERNAL_ERROR, we're having IO errors or other errors that we can't
// recover from.
- TransformStatus(document_id_old_to_new_or.status(), result_status);
+ TransformStatus(optimize_result_or.status(), result_status);
return result_proto;
}
// The status is either OK or DATA_LOSS. The optimized document store is
// guaranteed to work, so we update index according to the new document store.
std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
+ auto doc_store_optimize_result_status = optimize_result_or.status();
bool should_rebuild_index =
- !document_id_old_to_new_or.ok() ||
+ !optimize_result_or.ok() ||
+ optimize_result_or.ValueOrDie().should_rebuild_index ||
ShouldRebuildIndex(*optimize_stats,
options_.optimize_rebuild_index_threshold());
if (!should_rebuild_index) {
+ // At this point should_rebuild_index is false, so it means
+ // optimize_result_or.ok() is true and therefore it is safe to call
+ // ValueOrDie.
+ DocumentStore::OptimizeResult optimize_result =
+ std::move(optimize_result_or).ValueOrDie();
+
optimize_stats->set_index_restoration_mode(
OptimizeStatsProto::INDEX_TRANSLATION);
libtextclassifier3::Status index_optimize_status =
- index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
+ index_->Optimize(optimize_result.document_id_old_to_new,
document_store_->last_added_document_id());
if (!index_optimize_status.ok()) {
ICING_LOG(WARNING) << "Failed to optimize index. Error: "
@@ -1591,7 +1657,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
}
libtextclassifier3::Status integer_index_optimize_status =
- integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
+ integer_index_->Optimize(optimize_result.document_id_old_to_new,
document_store_->last_added_document_id());
if (!integer_index_optimize_status.ok()) {
ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
@@ -1601,7 +1667,8 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
libtextclassifier3::Status qualified_id_join_index_optimize_status =
qualified_id_join_index_->Optimize(
- document_id_old_to_new_or.ValueOrDie(),
+ optimize_result.document_id_old_to_new,
+ optimize_result.namespace_id_old_to_new,
document_store_->last_added_document_id());
if (!qualified_id_join_index_optimize_status.ok()) {
ICING_LOG(WARNING)
@@ -1613,6 +1680,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
// valid document store, but it might be the old one or the new one. So throw
// out the index data and rebuild from scratch.
+ // Also rebuild index if DocumentStore::OptimizeInto hints to do so.
// Likewise, if Index::Optimize failed, then attempt to recover the index by
// rebuilding from scratch.
// If ShouldRebuildIndex() returns true, we will also rebuild the index for
@@ -1671,7 +1739,11 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
// Update the status for this run and write it.
auto optimize_status = std::make_unique<OptimizeStatusProto>();
optimize_status->set_last_successful_optimize_run_time_ms(current_time);
- optimize_status_file.Write(std::move(optimize_status));
+ auto write_status = optimize_status_file.Write(std::move(optimize_status));
+ if (!write_status.ok()) {
+ ICING_LOG(ERROR) << "Failed to write optimize status:\n"
+ << write_status.error_message();
+ }
// Flushes data to disk after doing optimization
status = InternalPersistToDisk(PersistType::FULL);
@@ -1684,7 +1756,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
optimize_stats->set_storage_size_after(
Filesystem::SanitizeFileSize(after_size));
- TransformStatus(document_id_old_to_new_or.status(), result_status);
+ TransformStatus(doc_store_optimize_result_status, result_status);
return result_proto;
}
@@ -1891,7 +1963,17 @@ SearchResultProto IcingSearchEngine::InternalSearch(
StatusProto* result_status = result_proto.mutable_status();
QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+ query_stats->set_is_first_page(true);
+ query_stats->set_requested_page_size(result_spec.num_per_page());
+
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+ query_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ query_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
query_stats->set_query_length(search_spec.query().length());
+ query_stats->set_ranking_strategy(scoring_spec.rank_by());
+
if (!initialized_) {
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("IcingSearchEngine has not been initialized!");
@@ -1910,27 +1992,22 @@ SearchResultProto IcingSearchEngine::InternalSearch(
return result_proto;
}
- query_stats->set_num_namespaces_filtered(
- search_spec.namespace_filters_size());
- query_stats->set_num_schema_types_filtered(
- search_spec.schema_type_filters_size());
- query_stats->set_ranking_strategy(scoring_spec.rank_by());
- query_stats->set_is_first_page(true);
- query_stats->set_requested_page_size(result_spec.num_per_page());
-
const JoinSpecProto& join_spec = search_spec.join_spec();
std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info;
int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
if (!join_spec.parent_property_expression().empty() &&
!join_spec.child_property_expression().empty()) {
+ query_stats->set_is_join_query(true);
+ QueryStatsProto::SearchStats* child_search_stats =
+ query_stats->mutable_child_search_stats();
+
// Process child query
QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
join_spec.nested_spec().search_spec(),
join_spec.nested_spec().scoring_spec(),
join_spec.nested_spec().result_spec(),
- /*join_children_fetcher=*/nullptr, current_time_ms);
- // TOOD(b/256022027): set different kinds of latency for 2nd query.
+ /*join_children_fetcher=*/nullptr, current_time_ms, child_search_stats);
if (!nested_query_scoring_results.status.ok()) {
TransformStatus(nested_query_scoring_results.status, result_status);
return result_proto;
@@ -1961,24 +2038,24 @@ SearchResultProto IcingSearchEngine::InternalSearch(
}
// Process parent query
- QueryScoringResults query_scoring_results =
- ProcessQueryAndScore(search_spec, scoring_spec, result_spec,
- join_children_fetcher.get(), current_time_ms);
- int term_count = 0;
- for (const auto& section_and_terms : query_scoring_results.query_terms) {
- term_count += section_and_terms.second.size();
- }
- query_stats->set_num_terms(term_count);
+ QueryStatsProto::SearchStats* parent_search_stats =
+ query_stats->mutable_parent_search_stats();
+ QueryScoringResults query_scoring_results = ProcessQueryAndScore(
+ search_spec, scoring_spec, result_spec, join_children_fetcher.get(),
+ current_time_ms, parent_search_stats);
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+ query_stats->set_num_terms(parent_search_stats->num_terms());
query_stats->set_parse_query_latency_ms(
- query_scoring_results.parse_query_latency_ms);
- query_stats->set_scoring_latency_ms(query_scoring_results.scoring_latency_ms);
+ parent_search_stats->parse_query_latency_ms());
+ query_stats->set_scoring_latency_ms(
+ parent_search_stats->scoring_latency_ms());
+ query_stats->set_num_documents_scored(
+ parent_search_stats->num_documents_scored());
if (!query_scoring_results.status.ok()) {
TransformStatus(query_scoring_results.status, result_status);
return result_proto;
}
- query_stats->set_num_documents_scored(
- query_scoring_results.scored_document_hits.size());
// Returns early for empty result
if (query_scoring_results.scored_document_hits.empty()) {
result_status->set_code(StatusProto::OK);
@@ -2092,7 +2169,15 @@ SearchResultProto IcingSearchEngine::InternalSearch(
IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
const ResultSpecProto& result_spec,
- const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) {
+ const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms,
+ QueryStatsProto::SearchStats* search_stats) {
+ search_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ search_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
+ search_stats->set_query_length(search_spec.query().length());
+ search_stats->set_ranking_strategy(scoring_spec.rank_by());
+
std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
@@ -2100,11 +2185,11 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
index_.get(), integer_index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(), schema_store_.get());
if (!query_processor_or.ok()) {
- return QueryScoringResults(
- std::move(query_processor_or).status(), /*query_terms_in=*/{},
- /*scored_document_hits_in=*/{},
- /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
- /*scoring_latency_ms_in=*/0);
+ search_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ return QueryScoringResults(std::move(query_processor_or).status(),
+ /*query_terms_in=*/{},
+ /*scored_document_hits_in=*/{});
}
std::unique_ptr<QueryProcessor> query_processor =
std::move(query_processor_or).ValueOrDie();
@@ -2117,15 +2202,25 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
} else {
query_results_or = ranking_strategy_or.status();
}
+ search_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
if (!query_results_or.ok()) {
- return QueryScoringResults(
- std::move(query_results_or).status(), /*query_terms_in=*/{},
- /*scored_document_hits_in=*/{},
- /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
- /*scoring_latency_ms_in=*/0);
+ return QueryScoringResults(std::move(query_results_or).status(),
+ /*query_terms_in=*/{},
+ /*scored_document_hits_in=*/{});
}
QueryResults query_results = std::move(query_results_or).ValueOrDie();
- int64_t parse_query_latency_ms = component_timer->GetElapsedMilliseconds();
+
+ // Set SearchStats related to QueryResults.
+ int term_count = 0;
+ for (const auto& section_and_terms : query_results.query_terms) {
+ term_count += section_and_terms.second.size();
+ }
+ search_stats->set_num_terms(term_count);
+
+ if (query_results.features_in_use.count(kNumericSearchFeature)) {
+ search_stats->set_is_numeric_query(true);
+ }
component_timer = clock_->GetNewTimer();
// Scores but does not rank the results.
@@ -2136,22 +2231,20 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
if (!scoring_processor_or.ok()) {
return QueryScoringResults(std::move(scoring_processor_or).status(),
std::move(query_results.query_terms),
- /*scored_document_hits_in=*/{},
- parse_query_latency_ms,
- /*scoring_latency_ms_in=*/0);
+ /*scored_document_hits_in=*/{});
}
std::unique_ptr<ScoringProcessor> scoring_processor =
std::move(scoring_processor_or).ValueOrDie();
std::vector<ScoredDocumentHit> scored_document_hits =
- scoring_processor->Score(std::move(query_results.root_iterator),
- result_spec.num_to_score(),
- &query_results.query_term_iterators);
- int64_t scoring_latency_ms = component_timer->GetElapsedMilliseconds();
+ scoring_processor->Score(
+ std::move(query_results.root_iterator), result_spec.num_to_score(),
+ &query_results.query_term_iterators, search_stats);
+ search_stats->set_scoring_latency_ms(
+ component_timer->GetElapsedMilliseconds());
return QueryScoringResults(libtextclassifier3::Status::OK,
std::move(query_results.query_terms),
- std::move(scored_document_hits),
- parse_query_latency_ms, scoring_latency_ms);
+ std::move(scored_document_hits));
}
SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
@@ -2242,7 +2335,7 @@ void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) {
result_state_manager_->InvalidateResultState(next_page_token);
}
-libtextclassifier3::StatusOr<std::vector<DocumentId>>
+libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
// Gets the current directory path and an empty tmp directory path for
// document store optimization.
@@ -2259,16 +2352,16 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
}
// Copies valid document data to tmp directory
- libtextclassifier3::StatusOr<std::vector<DocumentId>>
- document_id_old_to_new_or = document_store_->OptimizeInto(
+ libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+ optimize_result_or = document_store_->OptimizeInto(
temporary_document_dir, language_segmenter_.get(), optimize_stats);
// Handles error if any
- if (!document_id_old_to_new_or.ok()) {
+ if (!optimize_result_or.ok()) {
filesystem_->DeleteDirectoryRecursively(temporary_document_dir.c_str());
return absl_ports::Annotate(
absl_ports::AbortedError("Failed to optimize document store"),
- document_id_old_to_new_or.status().error_message());
+ optimize_result_or.status().error_message());
}
// result_state_manager_ depends on document_store_. So we need to reset it at
@@ -2337,7 +2430,9 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
"Document store has been optimized, but a valid document store "
"instance can't be created");
}
- document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+ DocumentStore::CreateResult create_result =
+ std::move(create_result_or).ValueOrDie();
+ document_store_ = std::move(create_result.document_store);
result_state_manager_ = std::make_unique<ResultStateManager>(
performance_configuration_.max_num_total_hits, *document_store_);
@@ -2347,7 +2442,19 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
"delete temporary file directory";
}
- return document_id_old_to_new_or;
+
+ // Since we created new (optimized) document store with correct PersistToDisk
+ // call, we shouldn't have data loss or regenerate derived files. Therefore,
+ // if we really encounter any of these situations, then return DataLossError
+ // to let the caller rebuild index.
+ if (create_result.data_loss != DataLoss::NONE ||
+ create_result.derived_files_regenerated) {
+ return absl_ports::DataLossError(
+ "Unexpected data loss or derived files regenerated for new document "
+ "store");
+ }
+
+ return optimize_result_or;
}
IcingSearchEngine::IndexRestorationResult
@@ -2479,11 +2586,12 @@ IcingSearchEngine::CreateDataIndexingHandlers() {
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
// Term index handler
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(
- clock_.get(), normalizer_.get(), index_.get()));
- handlers.push_back(std::move(string_section_indexing_handler));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ clock_.get(), normalizer_.get(), index_.get(),
+ options_.build_property_existence_metadata_hits()));
+ handlers.push_back(std::move(term_indexing_handler));
// Integer index handler
ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler>
@@ -2493,10 +2601,11 @@ IcingSearchEngine::CreateDataIndexingHandlers() {
handlers.push_back(std::move(integer_section_indexing_handler));
// Qualified id join index handler
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
- qualified_id_join_indexing_handler,
- QualifiedIdJoinIndexingHandler::Create(
- clock_.get(), qualified_id_join_index_.get()));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ clock_.get(), document_store_.get(), qualified_id_join_index_.get()));
handlers.push_back(std::move(qualified_id_join_indexing_handler));
return handlers;
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index d9d5ff6..d316350 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <string_view>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -546,10 +547,12 @@ class IcingSearchEngine {
// force_recovery_and_revalidate_documents.
//
// Returns:
- // OK on success
+ // On success, a boolean flag indicating whether derived files of the
+ // document store have been regenerated or not. If true, any other
+ // components depending on them should also be rebuilt if true.
// FAILED_PRECONDITION if initialize_stats is null
// INTERNAL on I/O error
- libtextclassifier3::Status InitializeDocumentStore(
+ libtextclassifier3::StatusOr<bool> InitializeDocumentStore(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -557,6 +560,9 @@ class IcingSearchEngine {
// Do any initialization/recovery necessary to create term index, integer
// index, and qualified id join index instances.
//
+ // If document_store_derived_files_regenerated is true, then we have to
+ // rebuild qualified id join index since NamespaceIds were reassigned.
+ //
// Returns:
// OK on success
// FAILED_PRECONDITION if initialize_stats is null
@@ -564,6 +570,7 @@ class IcingSearchEngine {
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL on I/O error
libtextclassifier3::Status InitializeIndex(
+ bool document_store_derived_files_regenerated,
InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -603,24 +610,20 @@ class IcingSearchEngine {
libtextclassifier3::Status status;
SectionRestrictQueryTermsMap query_terms;
std::vector<ScoredDocumentHit> scored_document_hits;
- int64_t parse_query_latency_ms;
- int64_t scoring_latency_ms;
explicit QueryScoringResults(
libtextclassifier3::Status status_in,
SectionRestrictQueryTermsMap&& query_terms_in,
- std::vector<ScoredDocumentHit>&& scored_document_hits_in,
- int64_t parse_query_latency_ms_in, int64_t scoring_latency_ms_in)
+ std::vector<ScoredDocumentHit>&& scored_document_hits_in)
: status(std::move(status_in)),
query_terms(std::move(query_terms_in)),
- scored_document_hits(std::move(scored_document_hits_in)),
- parse_query_latency_ms(parse_query_latency_ms_in),
- scoring_latency_ms(scoring_latency_ms_in) {}
+ scored_document_hits(std::move(scored_document_hits_in)) {}
};
QueryScoringResults ProcessQueryAndScore(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
const ResultSpecProto& result_spec,
- const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms)
+ const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms,
+ QueryStatsProto::SearchStats* search_stats)
ICING_SHARED_LOCKS_REQUIRED(mutex_);
// Many of the internal components rely on other components' derived data.
@@ -664,17 +667,18 @@ class IcingSearchEngine {
// would need call Initialize() to reinitialize everything into a valid state.
//
// Returns:
- // On success, a vector that maps from old document id to new document id. A
- // value of kInvalidDocumentId indicates that the old document id has been
- // deleted.
+ // On success, OptimizeResult which contains a vector mapping from old
+ // document id to new document id and another vector mapping from old
+ // namespace id to new namespace id. A value of kInvalidDocumentId indicates
+ // that the old document id has been deleted.
// ABORTED_ERROR if any error happens before the actual optimization, the
// original document store should be still available
// DATA_LOSS_ERROR on errors that could potentially cause data loss,
// document store is still available
// INTERNAL_ERROR on any IO errors or other errors that we can't recover
// from
- libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeDocumentStore(
- OptimizeStatsProto* optimize_stats)
+ libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+ OptimizeDocumentStore(OptimizeStatsProto* optimize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to restore missing document data in index_, integer_index_,
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 354d11c..18c6bb9 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -1117,6 +1117,8 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) {
IcingSearchEngineOptions options;
options.set_base_dir(test_dir);
options.set_index_merge_size(kIcingFullIndexSize);
+ options.set_document_store_namespace_id_fingerprint(true);
+ options.set_use_new_qualified_id_join_index(true);
std::unique_ptr<IcingSearchEngine> icing =
std::make_unique<IcingSearchEngine>(options);
@@ -1140,7 +1142,7 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) {
}
// Create Email documents (child)
- static constexpr int kNumEmailDocuments = 10000;
+ static constexpr int kNumEmailDocuments = 1000;
std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1);
std::default_random_engine e(/*seed=*/12345);
for (int i = 0; i < kNumEmailDocuments; ++i) {
@@ -1200,17 +1202,8 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) {
std::reduce(results.results().begin(), results.results().end(), 0,
child_count_reduce_func);
- // Get all pages.
- while (results.next_page_token() != kInvalidNextPageToken) {
- results = icing->GetNextPage(results.next_page_token());
- total_parent_count += results.results_size();
- total_child_count +=
- std::reduce(results.results().begin(), results.results().end(), 0,
- child_count_reduce_func);
- }
-
- ASSERT_THAT(total_parent_count, Eq(kNumPersonDocuments));
- ASSERT_THAT(total_child_count, Eq(kNumEmailDocuments));
+ ASSERT_THAT(total_parent_count, Eq(kNumPerPage));
+ ASSERT_THAT(total_child_count, ::testing::Ge(0));
}
}
BENCHMARK(BM_JoinQueryQualifiedId);
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index b4853b4..122e4af 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -12,28 +12,39 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <algorithm>
#include <cstdint>
#include <limits>
#include <memory>
#include <string>
+#include <string_view>
+#include <tuple>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/document-builder.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/file/version-util.h"
#include "icing/icing-search-engine.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index-processor.h"
#include "icing/index/index.h"
#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/term-indexing-handler.h"
#include "icing/jni/jni-cache.h"
-#include "icing/join/doc-join-info.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -59,8 +70,12 @@
#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -71,6 +86,7 @@
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
#include "icing/util/tokenized-document.h"
#include "unicode/uloc.h"
@@ -211,6 +227,8 @@ std::string GetHeaderFilename() {
IcingSearchEngineOptions GetDefaultIcingOptions() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestBaseDir());
+ icing_options.set_document_store_namespace_id_fingerprint(true);
+ icing_options.set_use_new_qualified_id_join_index(true);
return icing_options;
}
@@ -1040,12 +1058,14 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
{
// Initializes folder and schema, index one document
- TestIcingSearchEngine icing(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
- GetTestJniCache());
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
@@ -1064,7 +1084,9 @@ TEST_F(IcingSearchEngineInitializationTest,
DocumentStore::Create(
filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*namespace_id_fingerprint=*/
+ icing_options.document_store_namespace_id_fingerprint(),
+ /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
@@ -1102,8 +1124,7 @@ TEST_F(IcingSearchEngineInitializationTest,
HasSubstr("/qualified_id_join_index_dir/")))
.Times(0);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(), GetTestJniCache());
InitializeResultProto initialize_result = icing.Initialize();
@@ -1201,6 +1222,222 @@ TEST_F(IcingSearchEngineInitializationTest,
expected_join_search_result_proto));
}
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptedDocumentStore) {
+ // Test the following scenario: some document store derived files are
+ // corrupted. IcingSearchEngine should be able to recover the document store,
+ // and since NamespaceIds were reassigned, we should rebuild qualified id join
+ // index as well. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Should discard the entire qualified id join index directory and start
+ // it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ // - Still, we need to replay and reindex documents (for qualified id join
+ // index).
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto personDummy =
+ DocumentBuilder()
+ .SetKey("namespace2", "personDummy")
+ .SetSchema("Person")
+ .AddStringProperty("name", "personDummy")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace2", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace2#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ // "namespace2" (in personDummy) will be assigned NamespaceId = 0.
+ EXPECT_THAT(icing.Put(personDummy).status(), ProtoIsOk());
+ // "namespace1" (in person1) will be assigned NamespaceId = 1.
+ EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+
+ // Now delete personDummy.
+ EXPECT_THAT(
+ icing.Delete(personDummy.namespace_(), personDummy.uri()).status(),
+ ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+ // Manually corrupt one of the derived files of DocumentStore without
+ // updating checksum in DocumentStore header.
+ std::string score_cache_filename = GetDocumentDir() + "/score_cache";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>>
+ score_cache,
+ FileBackedVector<DocumentAssociatedScoreData>::Create(
+ *filesystem(), std::move(score_cache_filename),
+ MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(const DocumentAssociatedScoreData* score_data,
+ score_cache->Get(/*idx=*/0));
+ ICING_ASSERT_OK(score_cache->Set(
+ /*idx=*/0,
+ DocumentAssociatedScoreData(score_data->corpus_id(),
+ score_data->document_score() + 1,
+ score_data->creation_timestamp_ms(),
+ score_data->length_in_tokens())));
+ ICING_ASSERT_OK(score_cache->PersistToDisk());
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should be discarded once, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ // DocumentStore should be recovered. When reassigning NamespaceId, the order
+ // will be the document traversal order: [person1, person2, message].
+ // Therefore, "namespace1" will have id = 0 and "namespace2" will have id = 1.
+ EXPECT_THAT(
+ initialize_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ // Term, integer index should be unaffected.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // Qualified id join index should be rebuilt.
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field. message2
+ // should be joined to person2 correctly.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+
+ *expected_join_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person1;
+
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
// Test the following scenario: term index is corrupted (e.g. checksum doesn't
// match). IcingSearchEngine should be able to recover term index. Several
@@ -3696,16 +3933,18 @@ TEST_F(IcingSearchEngineInitializationTest,
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ QualifiedIdJoinIndexImplV2::Create(filesystem,
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
// Add data for document 0.
ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
kInvalidDocumentId);
qualified_id_join_index->set_last_added_document_id(0);
ICING_ASSERT_OK(qualified_id_join_index->Put(
- DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0),
- /*ref_qualified_id_str=*/"namespace#person"));
+ /*schema_type_id=*/0, /*joinable_property_id=*/0, /*document_id=*/0,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*target_str=*/"uri")}));
}
// 3. Create the index again. This should trigger index restoration.
@@ -3766,12 +4005,14 @@ TEST_F(IcingSearchEngineInitializationTest,
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
- EXPECT_THAT(qualified_id_join_index->Get(
- DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ QualifiedIdJoinIndexImplV2::Create(filesystem,
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto iterator, qualified_id_join_index->GetIterator(
+ /*schema_type_id=*/0, /*joinable_property_id=*/0));
+ EXPECT_THAT(iterator->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
}
@@ -3855,7 +4096,6 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
- DocJoinInfo additional_data_key;
// 2. Manually add some data into qualified id join index and increment
// last_added_document_id. This will cause mismatched document id with
// document store.
@@ -3867,20 +4107,20 @@ TEST_F(IcingSearchEngineInitializationTest,
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ QualifiedIdJoinIndexImplV2::Create(filesystem,
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
// Add data for document 4.
DocumentId original_last_added_doc_id =
qualified_id_join_index->last_added_document_id();
qualified_id_join_index->set_last_added_document_id(
original_last_added_doc_id + 1);
- additional_data_key =
- DocJoinInfo(/*document_id=*/original_last_added_doc_id + 1,
- /*joinable_property_id=*/0);
ICING_ASSERT_OK(qualified_id_join_index->Put(
- additional_data_key,
- /*ref_qualified_id_str=*/"namespace#person"));
+ /*schema_type_id=*/1, /*joinable_property_id=*/0,
+ /*document_id=*/original_last_added_doc_id + 1,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*target_str=*/"person")}));
}
// 3. Create the index again. This should trigger index restoration.
@@ -4288,9 +4528,12 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result_proto.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::PARTIAL_LOSS));
- // Since document store rewinds to previous checkpoint, last stored doc id
- // will be consistent with last added document ids in term/integer indices,
- // so there will be no index restoration.
+ // Document store rewinds to previous checkpoint and all derived files were
+ // regenerated.
+ // - Last stored doc id will be consistent with last added document ids in
+ // term/integer indices, so there will be no index restoration.
+ // - Qualified id join index depends on document store derived files and
+ // since they were regenerated, we should rebuild qualified id join index.
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
@@ -4299,10 +4542,10 @@ TEST_F(IcingSearchEngineInitializationTest,
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.qualified_id_join_index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
- Eq(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
Eq(InitializeStatsProto::NONE));
@@ -4954,7 +5197,7 @@ TEST_F(IcingSearchEngineInitializationTest,
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
.WillRepeatedly(DoDefault());
- // This fails QualifiedIdJoinIndex::Create() once.
+ // This fails QualifiedIdJoinIndexImplV2::Create() once.
EXPECT_CALL(
*mock_filesystem,
PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
@@ -5054,10 +5297,10 @@ TEST_F(IcingSearchEngineInitializationTest,
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.qualified_id_join_index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(0));
+ Eq(10));
EXPECT_THAT(
initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
Eq(InitializeStatsProto::NONE));
@@ -5160,6 +5403,169 @@ TEST_F(IcingSearchEngineInitializationTest,
}
}
+// TODO(b/275121148): deprecate this test after rollout join index v2.
+class IcingSearchEngineInitializationSwitchJoinIndexTest
+ : public IcingSearchEngineInitializationTest,
+ public ::testing::WithParamInterface<bool> {};
+TEST_P(IcingSearchEngineInitializationSwitchJoinIndexTest, SwitchJoinIndex) {
+ bool use_join_index_v2 = GetParam();
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with message 3 documents.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(true);
+ options.set_use_new_qualified_id_join_index(use_join_index_v2);
+
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again changing join index version. This should trigger
+ // join index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should be discarded once, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(1);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(true);
+ options.set_use_new_qualified_id_join_index(!use_join_index_v2);
+
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(IcingSearchEngineInitializationSwitchJoinIndexTest,
+ IcingSearchEngineInitializationSwitchJoinIndexTest,
+ testing::Values(true, false));
+
class IcingSearchEngineInitializationVersionChangeTest
: public IcingSearchEngineInitializationTest,
public ::testing::WithParamInterface<version_util::VersionInfo> {};
@@ -5219,12 +5625,14 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
{
// Initializes folder and schema, index person1 and person2
- TestIcingSearchEngine icing(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
- GetTestJniCache());
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
@@ -5251,7 +5659,9 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
DocumentStore::Create(
filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*namespace_id_fingerprint=*/
+ icing_options.document_store_namespace_id_fingerprint(),
+ /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
@@ -5276,25 +5686,26 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(
- *filesystem(), GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false));
+ QualifiedIdJoinIndexImplV2::Create(*filesystem(),
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(),
- index.get()));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock, normalizer_.get(), index.get(),
+ /*build_property_existence_metadata_hits=*/true));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(
&fake_clock, integer_index.get()));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
- qualified_id_join_indexing_handler,
- QualifiedIdJoinIndexingHandler::Create(
- &fake_clock, qualified_id_join_index.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock, document_store.get(), qualified_id_join_index.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
handlers.push_back(std::move(qualified_id_join_indexing_handler));
IndexProcessor index_processor(std::move(handlers), &fake_clock);
@@ -5457,6 +5868,163 @@ INSTANTIATE_TEST_SUITE_P(
/*version_in=*/0,
/*max_version_in=*/version_util::kVersion)));
+class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest
+ : public IcingSearchEngineInitializationTest,
+ public ::testing::WithParamInterface<std::tuple<bool, bool>> {};
+TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+ ChangePropertyExistenceHitsFlagTest) {
+ bool before_build_property_existence_metadata_hits = std::get<0>(GetParam());
+ bool after_build_property_existence_metadata_hits = std::get<1>(GetParam());
+ bool flag_changed = before_build_property_existence_metadata_hits !=
+ after_build_property_existence_metadata_hits;
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Value")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("score")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create a document with every property.
+ DocumentProto document0 = DocumentBuilder()
+ .SetKey("icing", "uri0")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing body.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing timestamp.
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "uri2")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+
+ // 1. Create an index with the 3 documents.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(
+ before_build_property_existence_metadata_hits);
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again with
+ // after_build_property_existence_metadata_hits.
+ //
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure that the term index is rebuilt if the flag is changed.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(flag_changed ? 1 : 0);
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(
+ after_build_property_existence_metadata_hits);
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Ensure that the term index is rebuilt if the flag is changed.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(flag_changed ? InitializeStatsProto::IO_ERROR
+ : InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Get all documents that have "body".
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+ search_spec.add_enabled_features(
+ std::string(kListFilterQueryLanguageFeature));
+ search_spec.set_query("hasProperty(\"body\")");
+ SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ if (after_build_property_existence_metadata_hits) {
+ EXPECT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+ } else {
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+
+ // Get all documents that have "timestamp".
+ search_spec.set_query("hasProperty(\"timestamp\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ if (after_build_property_existence_metadata_hits) {
+ EXPECT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+ } else {
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+
+ // Get all documents that have "score".
+ search_spec.set_query("hasProperty(\"score\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ if (after_build_property_existence_metadata_hits) {
+ EXPECT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
+ } else {
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+ IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+ testing::Values(std::make_tuple(false, false), std::make_tuple(false, true),
+ std::make_tuple(true, false), std::make_tuple(true, true)));
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
index 3127171..61b594c 100644
--- a/icing/icing-search-engine_optimize_test.cc
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -1552,6 +1552,8 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeThresholdTest) {
expected.set_num_original_documents(3);
expected.set_num_deleted_documents(1);
expected.set_num_expired_documents(1);
+ expected.set_num_original_namespaces(1);
+ expected.set_num_deleted_namespaces(0);
expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
// Run Optimize
@@ -1584,6 +1586,8 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeThresholdTest) {
expected.set_num_original_documents(1);
expected.set_num_deleted_documents(0);
expected.set_num_expired_documents(0);
+ expected.set_num_original_namespaces(1);
+ expected.set_num_deleted_namespaces(0);
expected.set_time_since_last_optimize_ms(10000);
expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
@@ -1606,6 +1610,8 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeThresholdTest) {
expected.set_num_original_documents(1);
expected.set_num_deleted_documents(1);
expected.set_num_expired_documents(0);
+ expected.set_num_original_namespaces(1);
+ expected.set_num_deleted_namespaces(1);
expected.set_time_since_last_optimize_ms(0);
// Should rebuild the index since all documents are removed.
expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
@@ -1689,6 +1695,8 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
expected.set_num_original_documents(3);
expected.set_num_deleted_documents(1);
expected.set_num_expired_documents(1);
+ expected.set_num_original_namespaces(1);
+ expected.set_num_deleted_namespaces(0);
expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
// Run Optimize
@@ -1723,6 +1731,8 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
expected.set_num_original_documents(1);
expected.set_num_deleted_documents(0);
expected.set_num_expired_documents(0);
+ expected.set_num_original_namespaces(1);
+ expected.set_num_deleted_namespaces(0);
expected.set_time_since_last_optimize_ms(10000);
expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
@@ -1745,6 +1755,8 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
expected.set_num_original_documents(1);
expected.set_num_deleted_documents(1);
expected.set_num_expired_documents(0);
+ expected.set_num_original_namespaces(1);
+ expected.set_num_deleted_namespaces(1);
expected.set_time_since_last_optimize_ms(0);
expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
index 0e88c5a..49c024e 100644
--- a/icing/icing-search-engine_schema_test.cc
+++ b/icing/icing-search-engine_schema_test.cc
@@ -44,6 +44,7 @@
#include "icing/proto/usage.pb.h"
#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
+#include "icing/schema/section.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -114,6 +115,8 @@ std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
IcingSearchEngineOptions GetDefaultIcingOptions() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestBaseDir());
+ icing_options.set_document_store_namespace_id_fingerprint(true);
+ icing_options.set_use_new_qualified_id_join_index(true);
return icing_options;
}
@@ -3131,6 +3134,26 @@ TEST_F(IcingSearchEngineSchemaTest, IcingShouldWorkFor64Sections) {
EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
}
+TEST_F(IcingSearchEngineSchemaTest, IcingShouldReturnErrorForExtraSections) {
+ // Create a schema with more sections than allowed.
+ SchemaTypeConfigBuilder schema_type_config_builder =
+ SchemaTypeConfigBuilder().SetType("type");
+ for (int i = 0; i <= kMaxSectionId + 1; ++i) {
+ schema_type_config_builder.AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop" + std::to_string(i))
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL));
+ }
+ SchemaProto schema =
+ SchemaBuilder().AddType(schema_type_config_builder).Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status().message(),
+ HasSubstr("Too many properties to be indexed"));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index 451c9ce..21512c6 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -24,6 +24,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/icing-search-engine.h"
+#include "icing/index/lite/term-id-hit-pair.h"
#include "icing/jni/jni-cache.h"
#include "icing/join/join-processor.h"
#include "icing/portable/endian.h"
@@ -45,6 +46,7 @@
#include "icing/proto/term.pb.h"
#include "icing/proto/usage.pb.h"
#include "icing/query/query-features.h"
+#include "icing/result/result-state-manager.h"
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -60,10 +62,12 @@ namespace lib {
namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::DoubleEq;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Gt;
using ::testing::IsEmpty;
+using ::testing::Lt;
using ::testing::Ne;
using ::testing::SizeIs;
@@ -119,6 +123,8 @@ constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
IcingSearchEngineOptions GetDefaultIcingOptions() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestBaseDir());
+ icing_options.set_document_store_namespace_id_fingerprint(true);
+ icing_options.set_use_new_qualified_id_join_index(true);
return icing_options;
}
@@ -393,14 +399,39 @@ TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult) {
EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
Eq(1000));
EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .parse_query_latency_ms(),
Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .scoring_latency_ms(),
Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_documents_scored(),
+ Eq(2));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_lite_index(),
+ Eq(2));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_main_index(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_integer_index(),
+ Eq(0));
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
@@ -444,14 +475,39 @@ TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult_readOnlyFalse) {
EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
Eq(1000));
EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .parse_query_latency_ms(),
Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .scoring_latency_ms(),
Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_documents_scored(),
+ Eq(2));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_lite_index(),
+ Eq(2));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_main_index(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_integer_index(),
+ Eq(0));
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
@@ -616,7 +672,6 @@ TEST_P(IcingSearchEngineSearchTest,
expected_search_result_proto));
}
-
TEST_P(IcingSearchEngineSearchTest,
SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -779,14 +834,39 @@ TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnEmpty) {
EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
Eq(1000));
EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
- Eq(0));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .parse_query_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .scoring_latency_ms(),
Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_documents_scored(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_lite_index(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_main_index(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats()
+ .parent_search_stats()
+ .num_fetched_hits_integer_index(),
+ Eq(0));
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -3633,42 +3713,171 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFilters) {
// 3. Verify that only the first document is returned. Although 'hello' is
// present in document_two, it shouldn't be in the result since 'hello' is not
// in the specified property filter.
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmptySearchWithPropertyFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with a property filter
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ // 3. Verify that both documents are returned.
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmptySearchWithEmptyPropertyFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with a property filter
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ // Add empty list for Email's property filters
+ email_property_filters->set_schema_type("Email");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ // 3. Verify that both documents are returned.
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
}
TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// Add Person and Organization schema with a property 'name' in both.
- SchemaProto schema = SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Organization")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("address")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(),
- ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Organization")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("address")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
// 1. Add person document
DocumentProto person_document =
@@ -3719,8 +3928,7 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) {
// 3. Verify that only the person document is returned. Although 'Meg' is
// present in organization document, it shouldn't be in the result since
// the name field is not specified in the Organization property filter.
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(person_document));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(person_document));
}
TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) {
@@ -3792,8 +4000,7 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) {
// document doesn't contain the word 'hello' in either of fields specified in
// the property filter. This confirms that the property filters for the
// wildcard entry have been applied to the Email schema as well.
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
}
TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) {
@@ -3872,8 +4079,7 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) {
// or body. This confirms that the property filters specified for Email schema
// have been applied and the ones specified for wildcard entry have been
// ignored.
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(document_two));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
}
TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) {
@@ -3945,26 +4151,22 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) {
// word 'hello' in at least 1 property. The second document being returned
// confirms that the body field was searched and the specified property
// filters were not applied to the Email schema type.
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(document_two));
- EXPECT_THAT(results.results(1).document(),
- EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
}
TEST_P(IcingSearchEngineSearchTest, SearchWithEmptyPropertyFilter) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
- ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// 1. Add two email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Message")
- .AddStringProperty("body", "Hello World!")
- .Build();
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
// 2. Issue a query with empty property filter for Message schema.
@@ -3994,17 +4196,15 @@ TEST_P(IcingSearchEngineSearchTest,
SearchWithPropertyFilterHavingInvalidProperty) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
- ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// 1. Add two email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Message")
- .AddStringProperty("body", "Hello World!")
- .Build();
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
// 2. Issue a query with property filter having invalid/unknown property for
@@ -4102,15 +4302,138 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersWithNesting) {
// document doesn't contain the word 'hello' in sender.emailAddress. The first
// document being returned confirms that the nested property
// sender.emailAddress was actually searched.
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithPropertyFilter_RelevanceScoreUnaffectedByExcludedSectionHits) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Hello Ryan")
+ .AddStringProperty("emailAddress", "hello@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello Hello!")
+ .AddStringProperty("body", "hello1 hello2 hello3 hello4 hello5")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "world@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello Hello!")
+ .AddStringProperty("body", "one1 two2 three3 four4 five5")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with a property filter
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("Hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ // 3. Verify that both documents are returned and have equal relevance score
+ // Note, the total number of tokens must be equal in the documents
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ scoring_spec->set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).score(), DoubleEq(results.results(1).score()));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithPropertyFilter_ExcludingSectionsWithHitsLowersRelevanceScore) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add an email document
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Hello Ryan")
+ .AddStringProperty("emailAddress", "hello@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello Hello!")
+ .AddStringProperty("body", "hello hello hello hello hello")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query without property filter
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("Hello");
+ search_spec->set_search_type(GetParam());
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ // 3. Get the relevance score without property filter
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ scoring_spec->set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ double original_relevance_score = results.results(0).score();
+
+ // 4. Relevance score with property filter should be lower
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("subject");
+ results = icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).score(), Lt(original_relevance_score));
}
TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
+
+ // Set index merge size to 6 hits. This will cause document1, document2,
+ // document3's hits being merged into the main index, and document4,
+ // document5's hits will remain in the lite index.
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(sizeof(TermIdHitPair::Value) * 6);
+
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
std::make_unique<IcingFilesystem>(),
std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -4153,6 +4476,7 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
// Check the stats
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
QueryStatsProto exp_stats;
exp_stats.set_query_length(7);
exp_stats.set_num_terms(1);
@@ -4172,6 +4496,22 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
exp_stats.set_num_joined_results_returned_current_page(0);
+
+ QueryStatsProto::SearchStats* exp_parent_search_stats =
+ exp_stats.mutable_parent_search_stats();
+ exp_parent_search_stats->set_query_length(7);
+ exp_parent_search_stats->set_num_terms(1);
+ exp_parent_search_stats->set_num_namespaces_filtered(1);
+ exp_parent_search_stats->set_num_schema_types_filtered(1);
+ exp_parent_search_stats->set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_parent_search_stats->set_num_documents_scored(5);
+ exp_parent_search_stats->set_parse_query_latency_ms(5);
+ exp_parent_search_stats->set_scoring_latency_ms(5);
+ exp_parent_search_stats->set_num_fetched_hits_lite_index(2);
+ exp_parent_search_stats->set_num_fetched_hits_main_index(3);
+ exp_parent_search_stats->set_num_fetched_hits_integer_index(0);
+
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
// Second page, 2 result with 1 snippet
@@ -4212,8 +4552,14 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
+
+ // Set index merge size to 13 hits. This will cause person1, person2, email1,
+ // email2, email3's hits being merged into the main index, and person3,
+ // email4's hits will remain in the lite index.
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(sizeof(TermIdHitPair::Value) * 13);
+
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
std::make_unique<IcingFilesystem>(),
std::move(fake_clock), GetTestJniCache());
@@ -4233,8 +4579,7 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(PropertyConfigBuilder()
.SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
+ .SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder()
.SetType("Email")
@@ -4308,15 +4653,25 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.SetScore(1)
.Build();
+ DocumentProto email4 =
+ DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 4")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(0)
+ .Build();
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
// Parent SearchSpec
SearchSpecProto search_spec;
@@ -4353,13 +4708,14 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
std::numeric_limits<int32_t>::max());
// Since we:
- // - Use MAX for aggregation scoring strategy.
+ // - Use COUNT for aggregation scoring strategy.
// - (Default) use DOCUMENT_SCORE to score child documents.
// - (Default) use DESC as the ranking order.
//
- // person1 + email1 should have the highest aggregated score (3) and be
- // returned first. person2 + email2 (aggregated score = 2) should be the
- // second, and person3 + email3 (aggregated score = 1) should be the last.
+ // person1 with [email1, email2, email4] should have the highest aggregated
+ // score (3) and be returned first. person2 with [email3] (aggregated score =
+ // 1) should be the second, and person3 with no child (aggregated score = 0)
+ // should be the last.
SearchResultProto expected_result1;
expected_result1.mutable_status()->set_code(StatusProto::OK);
SearchResultProto::ResultProto* result_proto1 =
@@ -4367,6 +4723,7 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
*result_proto1->mutable_document() = person1;
*result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
*result_proto1->mutable_joined_results()->Add()->mutable_document() = email2;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email4;
SearchResultProto expected_result2;
expected_result2.mutable_status()->set_code(StatusProto::OK);
@@ -4390,6 +4747,7 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
EqualsSearchResultIgnoreStatsAndScores(expected_result1));
// Check the stats
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
QueryStatsProto exp_stats;
exp_stats.set_query_length(15);
exp_stats.set_num_terms(1);
@@ -4408,8 +4766,40 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
exp_stats.set_ranking_latency_ms(5);
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
- exp_stats.set_num_joined_results_returned_current_page(2);
+ exp_stats.set_num_joined_results_returned_current_page(3);
exp_stats.set_join_latency_ms(5);
+ exp_stats.set_is_join_query(true);
+
+ QueryStatsProto::SearchStats* exp_parent_search_stats =
+ exp_stats.mutable_parent_search_stats();
+ exp_parent_search_stats->set_query_length(15);
+ exp_parent_search_stats->set_num_terms(1);
+ exp_parent_search_stats->set_num_namespaces_filtered(0);
+ exp_parent_search_stats->set_num_schema_types_filtered(0);
+ exp_parent_search_stats->set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+ exp_parent_search_stats->set_num_documents_scored(3);
+ exp_parent_search_stats->set_parse_query_latency_ms(5);
+ exp_parent_search_stats->set_scoring_latency_ms(5);
+ exp_parent_search_stats->set_num_fetched_hits_lite_index(1);
+ exp_parent_search_stats->set_num_fetched_hits_main_index(2);
+ exp_parent_search_stats->set_num_fetched_hits_integer_index(0);
+
+ QueryStatsProto::SearchStats* exp_child_search_stats =
+ exp_stats.mutable_child_search_stats();
+ exp_child_search_stats->set_query_length(12);
+ exp_child_search_stats->set_num_terms(1);
+ exp_child_search_stats->set_num_namespaces_filtered(0);
+ exp_child_search_stats->set_num_schema_types_filtered(0);
+ exp_child_search_stats->set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ exp_child_search_stats->set_num_documents_scored(4);
+ exp_child_search_stats->set_parse_query_latency_ms(5);
+ exp_child_search_stats->set_scoring_latency_ms(5);
+ exp_child_search_stats->set_num_fetched_hits_lite_index(1);
+ exp_child_search_stats->set_num_fetched_hits_main_index(3);
+ exp_child_search_stats->set_num_fetched_hits_integer_index(0);
+
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
// Second page, 1 child doc.
@@ -4979,6 +5369,166 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
EqualsSearchResultIgnoreStatsAndScores(expected_result3));
}
+TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedIdMultipleNamespaces) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace1", "person")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace2", "person")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace1#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace1#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace2#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ // Since we:
+ // - Use COUNT for aggregation scoring strategy.
+ // - (Default) use DESC as the ranking order.
+ //
+ // pkg$db/namespace1#person + email1, email2 should have the highest
+ // aggregated score (2) and be returned first. pkg$db/namespace2#person +
+ // email3 (aggregated score = 1) should be the second.
+ SearchResultProto expected_result1;
+ expected_result1.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_result1.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email2;
+
+ SearchResultProto expected_result2;
+ expected_result2.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_result2.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+ *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email3;
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result1.set_next_page_token(next_page_token);
+ EXPECT_THAT(result1,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result2,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+}
+
TEST_P(IcingSearchEngineSearchTest,
JoinShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
SchemaProto schema =
@@ -5990,6 +6540,126 @@ TEST_F(IcingSearchEngineSearchTest, NumericFilterOldQueryFails) {
EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
+TEST_F(IcingSearchEngineSearchTest, NumericFilterQueryStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(2)
+ .AddInt64Property("price", 25)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ DocumentProto document_three = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(3)
+ .AddInt64Property("cost", 2)
+ .Build();
+ ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+ DocumentProto document_four = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(4)
+ .AddInt64Property("price", 15)
+ .Build();
+ ASSERT_THAT(icing.Put(document_four).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.add_namespace_filters("namespace");
+ search_spec.add_schema_type_filters(document_one.schema());
+ search_spec.set_query("price < 20");
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(5);
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ SearchResultProto results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_four));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+
+ // Check the stats
+ // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+ QueryStatsProto exp_stats;
+ exp_stats.set_query_length(10);
+ exp_stats.set_num_terms(0);
+ exp_stats.set_num_namespaces_filtered(1);
+ exp_stats.set_num_schema_types_filtered(1);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(5);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_documents_scored(2);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+
+ QueryStatsProto::SearchStats* exp_parent_search_stats =
+ exp_stats.mutable_parent_search_stats();
+ exp_parent_search_stats->set_query_length(10);
+ exp_parent_search_stats->set_num_terms(0);
+ exp_parent_search_stats->set_num_namespaces_filtered(1);
+ exp_parent_search_stats->set_num_schema_types_filtered(1);
+ exp_parent_search_stats->set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_parent_search_stats->set_is_numeric_query(true);
+ exp_parent_search_stats->set_num_documents_scored(2);
+ exp_parent_search_stats->set_parse_query_latency_ms(5);
+ exp_parent_search_stats->set_scoring_latency_ms(5);
+ exp_parent_search_stats->set_num_fetched_hits_lite_index(0);
+ exp_parent_search_stats->set_num_fetched_hits_main_index(0);
+ // Since we will inspect 1 bucket from "price" in integer index and it
+ // contains 3 hits, we will fetch 3 hits (but filter out one of them).
+ exp_parent_search_stats->set_num_fetched_hits_integer_index(3);
+
+ EXPECT_THAT(results.query_stats(), EqualsProto(exp_stats));
+}
+
TEST_P(IcingSearchEngineSearchTest, BarisNormalizationTest) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -6188,6 +6858,310 @@ TEST_P(IcingSearchEngineSearchTest,
}
}
+TEST_P(IcingSearchEngineSearchTest, HasPropertyQuery) {
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ GTEST_SKIP()
+ << "The hasProperty() function is only supported in advanced query.";
+ }
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Value")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("score")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create a document with every property.
+ DocumentProto document0 = DocumentBuilder()
+ .SetKey("icing", "uri0")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing body.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing timestamp.
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "uri2")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Get all documents that have "body".
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+ search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+ search_spec.add_enabled_features(
+ std::string(kListFilterQueryLanguageFeature));
+ search_spec.set_query("hasProperty(\"body\")");
+ SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+
+ // Get all documents that have "timestamp".
+ search_spec.set_query("hasProperty(\"timestamp\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+
+ // Get all documents that have "score".
+ search_spec.set_query("hasProperty(\"score\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ HasPropertyQueryDoesNotWorkWithoutMetadataHits) {
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ GTEST_SKIP()
+ << "The hasProperty() function is only supported in advanced query.";
+ }
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Value")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("score")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create a document with every property.
+ DocumentProto document0 = DocumentBuilder()
+ .SetKey("icing", "uri0")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing body.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing timestamp.
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "uri2")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Check that none of the following hasProperty queries can return any
+ // results.
+ //
+ // Get all documents that have "body".
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+ search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+ search_spec.add_enabled_features(
+ std::string(kListFilterQueryLanguageFeature));
+ search_spec.set_query("hasProperty(\"body\")");
+ SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ // Get all documents that have "timestamp".
+ search_spec.set_query("hasProperty(\"timestamp\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ // Get all documents that have "score".
+ search_spec.set_query("hasProperty(\"score\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, HasPropertyQueryNestedDocument) {
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ GTEST_SKIP()
+ << "The hasProperty() function is only supported in advanced query.";
+ }
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Value")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("score")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TreeNode")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("value")
+ .SetDataTypeDocument(
+ "Value", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create a complex nested root_document with the following property paths.
+ // - name
+ // - value
+ // - value.body
+ // - value.score
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema("TreeNode")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("name", "root")
+ .AddDocumentProperty("value", DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema("Value")
+ .AddStringProperty("body", "foo")
+ .AddDoubleProperty("score", 456.789)
+ .Build())
+ .Build();
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Check that the document can be found by `hasProperty("name")`.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+ search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+ search_spec.add_enabled_features(
+ std::string(kListFilterQueryLanguageFeature));
+ search_spec.set_query("hasProperty(\"name\")");
+ SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+ // Check that the document can be found by `hasProperty("value")`.
+ search_spec.set_query("hasProperty(\"value\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+ // Check that the document can be found by `hasProperty("value.body")`.
+ search_spec.set_query("hasProperty(\"value.body\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+ // Check that the document can be found by `hasProperty("value.score")`.
+ search_spec.set_query("hasProperty(\"value.score\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+ // Check that the document can NOT be found by `hasProperty("body")`.
+ search_spec.set_query("hasProperty(\"body\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ // Check that the document can NOT be found by `hasProperty("score")`.
+ search_spec.set_query("hasProperty(\"score\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), IsEmpty());
+}
+
INSTANTIATE_TEST_SUITE_P(
IcingSearchEngineSearchTest, IcingSearchEngineSearchTest,
testing::Values(
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 8766f0b..8f5e319 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -12,13 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <cstdint>
+#include <limits>
#include <memory>
+#include <string>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
+#include "third_party/absl/flags/flag.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/data-indexing-handler.h"
@@ -27,11 +31,11 @@
#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/term-indexing-handler.h"
#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
#include "icing/schema/schema-store.h"
-#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
+#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/test-data.h"
@@ -40,7 +44,9 @@
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
#include "unicode/uloc.h"
@@ -189,16 +195,17 @@ libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
CreateDataIndexingHandlers(const Clock* clock, const Normalizer* normalizer,
Index* index, NumericIndex<int64_t>* integer_index) {
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(clock, normalizer, index));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ clock, normalizer, index,
+ /*build_property_existence_metadata_hits=*/true));
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<IntegerSectionIndexingHandler>
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(clock, integer_index));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
return handlers;
}
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index ba4ece3..3d1be68 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -30,16 +30,19 @@
#include "icing/absl_ports/str_join.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/index/data-indexing-handler.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
#include "icing/index/index.h"
#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/term-indexing-handler.h"
#include "icing/index/term-property-id.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -50,7 +53,6 @@
#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
-#include "icing/schema/schema-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
@@ -64,6 +66,7 @@
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/crc32.h"
#include "icing/util/tokenized-document.h"
#include "unicode/uloc.h"
@@ -180,11 +183,11 @@ class IndexProcessorTest : public Test {
IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv=*/false));
- ICING_ASSERT_OK_AND_ASSIGN(
- qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ QualifiedIdJoinIndexImplV1::Create(
+ filesystem_, qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -293,10 +296,10 @@ class IndexProcessorTest : public Test {
doc_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index_.get()));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index_.get(),
+ /*build_property_existence_metadata_hits=*/true));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(
@@ -304,10 +307,10 @@ class IndexProcessorTest : public Test {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler>
qualified_id_join_indexing_handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
handlers.push_back(std::move(qualified_id_join_indexing_handler));
@@ -634,12 +637,13 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
normalizer_factory::Create(
/*max_term_byte_size=*/4));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(
- &fake_clock_, normalizer.get(), index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer.get(), index_.get(),
+ /*build_property_existence_metadata_hits=*/true));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
index_processor_ =
std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
@@ -824,20 +828,21 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index_.get()));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index_.get(),
+ /*build_property_existence_metadata_hits=*/true));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
- qualified_id_join_indexing_handler,
- QualifiedIdJoinIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
handlers.push_back(std::move(qualified_id_join_indexing_handler));
@@ -979,12 +984,12 @@ TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index_.get()));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index_.get(),
+ /*build_property_existence_metadata_hits=*/true));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
index_processor_ =
std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
@@ -1045,12 +1050,12 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index_.get()));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index_.get(),
+ /*build_property_existence_metadata_hits=*/true));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
index_processor_ =
std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 31dcc7e..98058be 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -65,10 +65,10 @@ libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions(
"Requested hit buffer size %d is too large.",
options.index_merge_size));
}
- return LiteIndex::Options(options.base_dir + "/idx/lite.",
- options.index_merge_size,
- options.lite_index_sort_at_indexing,
- options.lite_index_sort_size);
+ return LiteIndex::Options(
+ options.base_dir + "/idx/lite.", options.index_merge_size,
+ options.lite_index_sort_at_indexing, options.lite_index_sort_size,
+ options.include_property_existence_metadata_hits);
}
std::string MakeMainIndexFilepath(const std::string& base_dir) {
diff --git a/icing/index/index.h b/icing/index/index.h
index 32ea97b..a5d75c4 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -72,16 +72,20 @@ class Index {
struct Options {
explicit Options(const std::string& base_dir, uint32_t index_merge_size,
bool lite_index_sort_at_indexing,
- uint32_t lite_index_sort_size)
+ uint32_t lite_index_sort_size,
+ bool include_property_existence_metadata_hits = false)
: base_dir(base_dir),
index_merge_size(index_merge_size),
lite_index_sort_at_indexing(lite_index_sort_at_indexing),
- lite_index_sort_size(lite_index_sort_size) {}
+ lite_index_sort_size(lite_index_sort_size),
+ include_property_existence_metadata_hits(
+ include_property_existence_metadata_hits) {}
std::string base_dir;
int32_t index_merge_size;
bool lite_index_sort_at_indexing;
int32_t lite_index_sort_size;
+ bool include_property_existence_metadata_hits;
};
// Creates an instance of Index in the directory pointed by file_dir.
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index b823535..04a6bb7 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -283,6 +283,228 @@ TEST_F(IndexTest, AdvancePastEndAfterMerge) {
EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
}
+TEST_F(IndexTest, IteratorGetCallStats_mainIndexOnly) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ // Merge the index.
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Before Advance().
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+ // 1st Advance().
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/1,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+ // 2nd Advance().
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/2,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+ // 3rd Advance().
+ ASSERT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/2,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+}
+
+TEST_F(IndexTest, IteratorGetCallStats_liteIndexOnly) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Before Advance().
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+ // 1st Advance().
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/1,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+ // 2nd Advance().
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/2,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+ // 3rd Advance().
+ ASSERT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/2,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+}
+
+TEST_F(IndexTest, IteratorGetCallStats) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ // Merge the index. 2 hits for "foo" will be merged into the main index.
+ ICING_ASSERT_OK(index_->Merge());
+
+ // Insert 2 more hits for "foo". It will be in the lite index.
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Before Advance().
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+ // 1st Advance(). DocHitInfoIteratorOr will advance both left and right
+ // iterator (i.e. lite and main index iterator) once, compare document ids,
+ // and return the hit with larger document id. In this case, hit from lite
+ // index will be chosen and returned.
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/1,
+ /*num_leaf_advance_calls_main_index=*/1,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+ // 2nd Advance(). Since lite index iterator has larger document id in the
+ // previous round, we advance lite index iterator in this round. We still
+ // choose and return hit from lite index.
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/2,
+ /*num_leaf_advance_calls_main_index=*/1,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+ // 3rd Advance(). Since lite index iterator has larger document id in the
+ // previous round, we advance lite index iterator in this round. However,
+ // there is no hit from lite index anymore, so we choose and return hit from
+ // main index.
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/2,
+ /*num_leaf_advance_calls_main_index=*/1,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+ // 4th Advance(). Advance main index.
+ ICING_ASSERT_OK(itr->Advance());
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/2,
+ /*num_leaf_advance_calls_main_index=*/2,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+ // 5th Advance(). Reach the end.
+ ASSERT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(
+ itr->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/2,
+ /*num_leaf_advance_calls_main_index=*/2,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+}
+
TEST_F(IndexTest, SingleHitSingleTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
index 67c7d25..1917fd0 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
@@ -32,7 +32,6 @@ libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() {
if (!IsDocumentIdValid(current_document_id_)) {
// Reached the end, set these to invalid values and return
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
index bb16eaf..60c5e0c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
@@ -37,10 +37,16 @@ class DocHitInfoIteratorAllDocumentId : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override { return 0; }
+ void MapChildren(const ChildrenMapper& mapper) override {}
- int32_t GetNumLeafAdvanceCalls() const override {
- return document_id_limit_ - current_document_id_;
+ CallStats GetCallStats() const override {
+ return CallStats(
+ /*num_leaf_advance_calls_lite_index_in=*/0,
+ /*num_leaf_advance_calls_main_index_in=*/0,
+ /*num_leaf_advance_calls_integer_index_in=*/0,
+ /*num_leaf_advance_calls_no_index_in=*/document_id_limit_ -
+ current_document_id_,
+ /*num_blocks_inspected_in=*/0);
}
std::string ToString() const override {
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
index ea2dda6..379cb4d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
@@ -41,9 +41,8 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
// We'll always start with an invalid document_id, need to Advance before we
// get anything out of this.
- EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(kInvalidDocumentId));
- EXPECT_THAT(all_it.hit_intersect_section_ids_mask(),
- Eq(kSectionIdMaskNone));
+ EXPECT_THAT(all_it.doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
}
{
@@ -54,26 +53,25 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
}
}
-TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumBlocksInspected) {
+TEST(DocHitInfoIteratorAllDocumentIdTest, GetCallStats) {
DocHitInfoIteratorAllDocumentId all_it(100);
- EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0));
-
- // Number of iterations is chosen arbitrarily. Just meant to demonstrate that
- // no matter how many Advance calls are made, GetNumBlocksInspected should
- // always return 0.
- for (int i = 0; i < 5; ++i) {
- EXPECT_THAT(all_it.Advance(), IsOk());
- EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0));
- }
-}
-
-TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumLeafAdvanceCalls) {
- DocHitInfoIteratorAllDocumentId all_it(100);
- EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(0));
+ EXPECT_THAT(
+ all_it.GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
for (int i = 1; i <= 5; ++i) {
EXPECT_THAT(all_it.Advance(), IsOk());
- EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(i));
+ EXPECT_THAT(
+ all_it.GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/i, /*num_blocks_inspected=*/0));
}
}
@@ -87,12 +85,8 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) {
// Test one advance
DocHitInfoIteratorAllDocumentId all_it(5);
EXPECT_THAT(all_it.Advance(), IsOk());
- EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(5));
-
- // Advancing shouldn't affect the intersect section ids mask, since there's
- // no intersecting going on
- EXPECT_THAT(all_it.hit_intersect_section_ids_mask(),
- Eq(kSectionIdMaskNone));
+ EXPECT_THAT(all_it.doc_hit_info(),
+ EqualsDocHitInfo(5, std::vector<SectionId>{}));
}
{
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 185a35e..249bd0e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -83,7 +83,6 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
// Didn't find anything for the first iterator, reset to invalid values and
// return.
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
@@ -106,8 +105,6 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
// Guaranteed that short_doc_id and long_doc_id match now
doc_hit_info_ = short_->doc_hit_info();
doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask());
- hit_intersect_section_ids_mask_ = short_->hit_intersect_section_ids_mask() &
- long_->hit_intersect_section_ids_mask();
return libtextclassifier3::Status::OK;
}
@@ -124,14 +121,6 @@ DocHitInfoIteratorAnd::TrimRightMostNode() && {
return trimmed_long;
}
-int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
- return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorAnd::GetNumLeafAdvanceCalls() const {
- return short_->GetNumLeafAdvanceCalls() + long_->GetNumLeafAdvanceCalls();
-}
-
std::string DocHitInfoIteratorAnd::ToString() const {
return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(),
")");
@@ -152,7 +141,6 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
// Didn't find anything for the first iterator, reset to invalid values and
// return
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
@@ -196,14 +184,10 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
// Found a DocumentId which exists in all the iterators
doc_hit_info_ = iterators_.at(0)->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- iterators_.at(0)->hit_intersect_section_ids_mask();
for (size_t i = 1; i < iterators_.size(); i++) {
doc_hit_info_.MergeSectionsFrom(
iterators_.at(i)->doc_hit_info().hit_section_ids_mask());
- hit_intersect_section_ids_mask_ &=
- iterators_.at(i)->hit_intersect_section_ids_mask();
}
return libtextclassifier3::Status::OK;
}
@@ -229,20 +213,12 @@ DocHitInfoIteratorAndNary::TrimRightMostNode() && {
return trimmed_right;
}
-int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
- int32_t blockCount = 0;
- for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
- blockCount += iter->GetNumBlocksInspected();
- }
- return blockCount;
-}
-
-int32_t DocHitInfoIteratorAndNary::GetNumLeafAdvanceCalls() const {
- int32_t leafCount = 0;
- for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
- leafCount += iter->GetNumLeafAdvanceCalls();
+DocHitInfoIterator::CallStats DocHitInfoIteratorAndNary::GetCallStats() const {
+ CallStats call_stats;
+ for (const auto& iter : iterators_) {
+ call_stats += iter->GetCallStats();
}
- return leafCount;
+ return call_stats;
}
std::string DocHitInfoIteratorAndNary::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index 0f40f94..8c52ac9 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -18,6 +18,7 @@
#include <cstdint>
#include <memory>
#include <string>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -42,14 +43,19 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override;
-
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override {
+ return short_->GetCallStats() + long_->GetCallStats();
+ }
std::string ToString() const override;
+ void MapChildren(const ChildrenMapper& mapper) override {
+ short_ = mapper(std::move(short_));
+ long_ = mapper(std::move(long_));
+ }
+
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats,
+ std::vector<TermMatchInfo>* matched_terms_stats,
SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
@@ -78,14 +84,18 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override;
-
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override;
std::string ToString() const override;
+ void MapChildren(const ChildrenMapper& mapper) override {
+ for (int i = 0; i < iterators_.size(); ++i) {
+ iterators_[i] = mapper(std::move(iterators_[i]));
+ }
+ }
+
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats,
+ std::vector<TermMatchInfo>* matched_terms_stats,
SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
index 51828cb..f204ada 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
@@ -74,39 +74,33 @@ TEST(DocHitInfoIteratorAndTest, Initialize) {
std::make_unique<DocHitInfoIteratorDummy>());
// We start out with invalid values
- EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
- EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(),
- Eq(kSectionIdMaskNone));
+ EXPECT_THAT(and_iter.doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
}
-TEST(DocHitInfoIteratorAndTest, GetNumBlocksInspected) {
- int first_iter_blocks = 4; // arbitrary value
+TEST(DocHitInfoIteratorAndTest, GetCallStats) {
+ DocHitInfoIterator::CallStats first_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumBlocksInspected(first_iter_blocks);
-
- int second_iter_blocks = 7; // arbitrary value
+ first_iter->SetCallStats(first_iter_call_stats);
+
+ DocHitInfoIterator::CallStats second_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/6,
+ /*num_leaf_advance_calls_main_index_in=*/2,
+ /*num_leaf_advance_calls_integer_index_in=*/10,
+ /*num_leaf_advance_calls_no_index_in=*/3,
+ /*num_blocks_inspected_in=*/7); // arbitrary value
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumBlocksInspected(second_iter_blocks);
+ second_iter->SetCallStats(second_iter_call_stats);
DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
- EXPECT_THAT(and_iter.GetNumBlocksInspected(),
- Eq(first_iter_blocks + second_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorAndTest, GetNumLeafAdvanceCalls) {
- int first_iter_leaves = 4; // arbitrary value
- auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
- int second_iter_leaves = 7; // arbitrary value
- auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
- DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
-
- EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(),
- Eq(first_iter_leaves + second_iter_leaves));
+ EXPECT_THAT(and_iter.GetCallStats(),
+ Eq(first_iter_call_stats + second_iter_call_stats));
}
TEST(DocHitInfoIteratorAndTest, AdvanceNoOverlap) {
@@ -293,24 +287,22 @@ TEST(DocHitInfoIteratorAndTest, SectionIdMask) {
// Created to test correct section_id_mask behavior.
SectionIdMask section_id_mask1 = 0b01010101; // hits in sections 0, 2, 4, 6
SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2
- SectionIdMask mask_anded_result = 0b00000100;
SectionIdMask mask_ored_result = 0b01010111;
std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
ICING_EXPECT_OK(and_iter.Advance());
EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(),
Eq(mask_ored_result));
- EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
}
TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) {
@@ -340,11 +332,11 @@ TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) {
auto first_iter =
std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter =
std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
DocHitInfoIteratorAnd and_iter(std::move(first_iter),
std::move(second_iter));
@@ -380,11 +372,11 @@ TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) {
auto first_iter =
std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter =
std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hi");
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ second_iter->set_hit_section_ids_mask(section_id_mask1);
DocHitInfoIteratorAnd and_iter(std::move(first_iter),
std::move(second_iter));
@@ -435,9 +427,8 @@ TEST(DocHitInfoIteratorAndNaryTest, Initialize) {
DocHitInfoIteratorAndNary and_iter(std::move(iterators));
// We start out with invalid values
- EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
- EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(),
- Eq(kSectionIdMaskNone));
+ EXPECT_THAT(and_iter.doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
}
TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) {
@@ -450,51 +441,42 @@ TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(DocHitInfoIteratorAndNaryTest, GetNumBlocksInspected) {
- int first_iter_blocks = 4; // arbitrary value
+TEST(DocHitInfoIteratorAndNaryTest, GetCallStats) {
+ DocHitInfoIterator::CallStats first_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumBlocksInspected(first_iter_blocks);
-
- int second_iter_blocks = 7; // arbitrary value
+ first_iter->SetCallStats(first_iter_call_stats);
+
+ DocHitInfoIterator::CallStats second_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/6,
+ /*num_leaf_advance_calls_main_index_in=*/2,
+ /*num_leaf_advance_calls_integer_index_in=*/10,
+ /*num_leaf_advance_calls_no_index_in=*/3,
+ /*num_blocks_inspected_in=*/7); // arbitrary value
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumBlocksInspected(second_iter_blocks);
-
- int third_iter_blocks = 13; // arbitrary value
+ second_iter->SetCallStats(second_iter_call_stats);
+
+ DocHitInfoIterator::CallStats third_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/1000,
+ /*num_leaf_advance_calls_main_index_in=*/2000,
+ /*num_leaf_advance_calls_integer_index_in=*/3000,
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/200); // arbitrary value
auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
- third_iter->SetNumBlocksInspected(third_iter_blocks);
-
- int fourth_iter_blocks = 1; // arbitrary value
- auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
- fourth_iter->SetNumBlocksInspected(fourth_iter_blocks);
-
- std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
- iterators.push_back(std::move(first_iter));
- iterators.push_back(std::move(second_iter));
- iterators.push_back(std::move(third_iter));
- iterators.push_back(std::move(fourth_iter));
- DocHitInfoIteratorAndNary and_iter(std::move(iterators));
-
- EXPECT_THAT(and_iter.GetNumBlocksInspected(),
- Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks +
- fourth_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorAndNaryTest, GetNumLeafAdvanceCalls) {
- int first_iter_leaves = 4; // arbitrary value
- auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
- int second_iter_leaves = 7; // arbitrary value
- auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
- int third_iter_leaves = 13; // arbitrary value
- auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
- third_iter->SetNumLeafAdvanceCalls(third_iter_leaves);
-
- int fourth_iter_leaves = 13; // arbitrary value
+ third_iter->SetCallStats(third_iter_call_stats);
+
+ DocHitInfoIterator::CallStats fourth_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/200,
+ /*num_leaf_advance_calls_main_index_in=*/400,
+ /*num_leaf_advance_calls_integer_index_in=*/100,
+ /*num_leaf_advance_calls_no_index_in=*/20,
+ /*num_blocks_inspected_in=*/50); // arbitrary value
auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
- fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves);
+ fourth_iter->SetCallStats(fourth_iter_call_stats);
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
iterators.push_back(std::move(first_iter));
@@ -503,9 +485,9 @@ TEST(DocHitInfoIteratorAndNaryTest, GetNumLeafAdvanceCalls) {
iterators.push_back(std::move(fourth_iter));
DocHitInfoIteratorAndNary and_iter(std::move(iterators));
- EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(),
- Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves +
- fourth_iter_leaves));
+ EXPECT_THAT(and_iter.GetCallStats(),
+ Eq(first_iter_call_stats + second_iter_call_stats +
+ third_iter_call_stats + fourth_iter_call_stats));
}
TEST(DocHitInfoIteratorAndNaryTest, Advance) {
@@ -541,7 +523,6 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2
SectionIdMask section_id_mask3 = 0b00001100; // hits in sections 2, 3
SectionIdMask section_id_mask4 = 0b00100100; // hits in sections 2, 5
- SectionIdMask mask_anded_result = 0b00000100;
SectionIdMask mask_ored_result = 0b01101111;
std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
@@ -550,16 +531,16 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4, section_id_mask4)};
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
auto third_iter = std::make_unique<DocHitInfoIteratorDummy>(third_vector);
- third_iter->set_hit_intersect_section_ids_mask(section_id_mask3);
+ third_iter->set_hit_section_ids_mask(section_id_mask3);
auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>(fourth_vector);
- fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4);
+ fourth_iter->set_hit_section_ids_mask(section_id_mask4);
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
iterators.push_back(std::move(first_iter));
@@ -572,7 +553,6 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
ICING_EXPECT_OK(and_iter.Advance());
EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(),
Eq(mask_ored_result));
- EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
}
TEST(DocHitInfoIteratorAndNaryTest, PopulateMatchedTermsStats) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index a82e556..82d1ac7 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -100,14 +100,11 @@ libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
// Satisfied all our specified filters
doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- delegate_->hit_intersect_section_ids_mask();
return libtextclassifier3::Status::OK;
}
// Didn't find anything on the delegate iterator.
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
@@ -123,14 +120,6 @@ DocHitInfoIteratorFilter::TrimRightMostNode() && {
return trimmed_delegate;
}
-int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
- return delegate_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorFilter::GetNumLeafAdvanceCalls() const {
- return delegate_->GetNumLeafAdvanceCalls();
-}
-
std::string DocHitInfoIteratorFilter::ToString() const {
return delegate_->ToString();
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index be5e1e8..608665e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -20,6 +20,7 @@
#include <string>
#include <string_view>
#include <unordered_set>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -62,9 +63,11 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override;
+ void MapChildren(const ChildrenMapper& mapper) override {
+ delegate_ = mapper(std::move(delegate_));
+ }
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
std::string ToString() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index d93fd02..0ed4d02 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -1001,28 +1001,22 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
EqualsDocHitInfo(document_id3, section_ids3)));
}
-TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) {
+TEST_F(DocHitInfoIteratorFilterTest, GetCallStats) {
+ DocHitInfoIterator::CallStats original_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
- original_iterator->SetNumBlocksInspected(5);
+ original_iterator->SetCallStats(original_call_stats);
DocHitInfoIteratorFilter::Options options;
DocHitInfoIteratorFilter filtered_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
options, fake_clock_.GetSystemTimeMilliseconds());
- EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
-}
-
-TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
- auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
- original_iterator->SetNumLeafAdvanceCalls(6);
-
- DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- options, fake_clock_.GetSystemTimeMilliseconds());
-
- EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
+ EXPECT_THAT(filtered_iterator.GetCallStats(), Eq(original_call_stats));
}
TEST_F(DocHitInfoIteratorFilterTest, TrimFilterIterator) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-none.h b/icing/index/iterator/doc-hit-info-iterator-none.h
index f938d32..c2853f1 100644
--- a/icing/index/iterator/doc-hit-info-iterator-none.h
+++ b/icing/index/iterator/doc-hit-info-iterator-none.h
@@ -39,9 +39,9 @@ class DocHitInfoIteratorNone : public DocHitInfoIterator {
return node;
}
- int32_t GetNumBlocksInspected() const override { return 0; }
+ void MapChildren(const ChildrenMapper& mapper) override {}
- int32_t GetNumLeafAdvanceCalls() const override { return 0; }
+ CallStats GetCallStats() const override { return CallStats(); }
std::string ToString() const override { return "(NONE)"; }
};
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index 38b1ded..10a8292 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -15,13 +15,15 @@
#include "icing/index/iterator/doc-hit-info-iterator-not.h"
#include <cstdint>
+#include <memory>
+#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
-#include "icing/schema/section.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/store/document-id.h"
namespace icing {
@@ -67,14 +69,8 @@ DocHitInfoIteratorNot::TrimRightMostNode() && {
"Cannot generate suggestion if the last term is NOT operator.");
}
-int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
- return to_be_excluded_->GetNumBlocksInspected() +
- all_document_id_iterator_.GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorNot::GetNumLeafAdvanceCalls() const {
- return to_be_excluded_->GetNumLeafAdvanceCalls() +
- all_document_id_iterator_.GetNumLeafAdvanceCalls();
+void DocHitInfoIteratorNot::MapChildren(const ChildrenMapper& mapper) {
+ to_be_excluded_ = mapper(std::move(to_be_excluded_));
}
std::string DocHitInfoIteratorNot::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.h b/icing/index/iterator/doc-hit-info-iterator-not.h
index 8cc3bf3..11575fb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.h
+++ b/icing/index/iterator/doc-hit-info-iterator-not.h
@@ -30,14 +30,12 @@ namespace lib {
// Iterator that will return all documents that are *not* specified by the
// to_be_excluded_iterator.
//
-// NOTE: The hit_intersect_section_ids_mask is meaningless for this iterator.
+// NOTE: doc_hit_info_.hit_section_ids_mask() is meaningless for this iterator.
// When this iterator produces a result, it's because the Document was not
// present in the to_be_excluded_iterator. There is no concept of the Document
// having been chosen because it's term was in a specific section. Since we
// don't know anything about the sections for the Document, the
-// hit_intersect_section_ids_mask is always kSectionIdMaskNone. Correspondingly,
-// this means that the doc_hit_info.hit_section_ids_mask will also always be
-// kSectionIdMaskNone.
+// doc_hit_info.hit_section_ids_mask() is always kSectionIdMaskNone.
class DocHitInfoIteratorNot : public DocHitInfoIterator {
public:
// to_be_excluded_iterator: The results of this iterator will be excluded
@@ -55,9 +53,12 @@ class DocHitInfoIteratorNot : public DocHitInfoIterator {
// to NOT operator.
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override;
+ void MapChildren(const ChildrenMapper& mapper) override;
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override {
+ return to_be_excluded_->GetCallStats() +
+ all_document_id_iterator_.GetCallStats();
+ }
std::string ToString() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
index 5a8ce2c..a8c835f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
@@ -102,40 +102,39 @@ TEST(DocHitInfoIteratorNotTest, AllDocumentIdOverlapOk) {
EXPECT_THAT(GetDocumentIds(&not_iterator), IsEmpty());
}
-TEST(DocHitInfoIteratorNotTest, GetNumBlocksInspected) {
- int to_be_excluded_iterator_blocks = 4; // arbitrary value
+TEST(DocHitInfoIteratorNotTest, GetCallStats) {
+ DocHitInfoIterator::CallStats to_be_excluded_iterator_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto to_be_excluded_iterator = std::make_unique<DocHitInfoIteratorDummy>();
- to_be_excluded_iterator->SetNumBlocksInspected(
- to_be_excluded_iterator_blocks);
-
- DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
- /*document_id_limit=*/5);
-
- // The AllDocumentId iterator doesn't count any blocks as being inspected
- // since it's just decrementing 1 from the document_id_limit.
- EXPECT_THAT(not_iterator.GetNumBlocksInspected(),
- Eq(to_be_excluded_iterator_blocks));
-}
-
-TEST(DocHitInfoIteratorNotTest, GetNumLeafAdvanceCalls) {
- int to_be_excluded_iterator_leaves = 4; // arbitrary value
- auto to_be_excluded_iterator = std::make_unique<DocHitInfoIteratorDummy>();
- to_be_excluded_iterator->SetNumLeafAdvanceCalls(
- to_be_excluded_iterator_leaves);
+ to_be_excluded_iterator->SetCallStats(to_be_excluded_iterator_call_stats);
int all_document_id_limit = 5;
// Since we iterate from [limit, 0] inclusive, add 1 for the 0th advance call
int all_leaf_advance_calls = all_document_id_limit + 1;
DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
- all_document_id_limit);
+ /*document_id_limit=*/5);
while (not_iterator.Advance().ok()) {
// Advance through the whole not iterator
}
- // The AllDocumentId iterator counts each DocumentId as a leaf advance call
- EXPECT_THAT(not_iterator.GetNumLeafAdvanceCalls(),
- Eq(to_be_excluded_iterator_leaves + all_leaf_advance_calls));
+ // The AllDocumentId iterator doesn't count lite/main/integer index or blocks
+ // as being inspected since it's just decrementing 1 from the
+ // document_id_limit.
+ EXPECT_THAT(
+ not_iterator.GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ to_be_excluded_iterator_call_stats.num_leaf_advance_calls_lite_index,
+ to_be_excluded_iterator_call_stats.num_leaf_advance_calls_main_index,
+ to_be_excluded_iterator_call_stats
+ .num_leaf_advance_calls_integer_index,
+ to_be_excluded_iterator_call_stats.num_leaf_advance_calls_no_index +
+ all_leaf_advance_calls,
+ to_be_excluded_iterator_call_stats.num_blocks_inspected));
}
TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
index 8f7b84f..6251365 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc
@@ -20,6 +20,7 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/store/document-id.h"
#include "icing/util/status-macros.h"
@@ -113,7 +114,6 @@ libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
right_document_id_ == kInvalidDocumentId) {
// Reached the end, set these to invalid values and return
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
@@ -132,26 +132,16 @@ libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
current_ = chosen;
doc_hit_info_ = chosen->doc_hit_info();
- hit_intersect_section_ids_mask_ = chosen->hit_intersect_section_ids_mask();
// If equal, combine.
if (left_document_id_ == right_document_id_) {
doc_hit_info_.MergeSectionsFrom(
right_->doc_hit_info().hit_section_ids_mask());
- hit_intersect_section_ids_mask_ &= right_->hit_intersect_section_ids_mask();
}
return libtextclassifier3::Status::OK;
}
-int32_t DocHitInfoIteratorOr::GetNumBlocksInspected() const {
- return left_->GetNumBlocksInspected() + right_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorOr::GetNumLeafAdvanceCalls() const {
- return left_->GetNumLeafAdvanceCalls() + right_->GetNumLeafAdvanceCalls();
-}
-
std::string DocHitInfoIteratorOr::ToString() const {
return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(),
")");
@@ -192,7 +182,6 @@ libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
// 0 is the smallest (last) DocumentId, can't advance further. Reset to
// invalid values and return directly
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
@@ -222,45 +211,31 @@ libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
// None of the iterators had a next document_id, reset to invalid values and
// return
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
// Found the next hit DocumentId, now calculate the section info.
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
for (const auto& iterator : iterators_) {
if (iterator->doc_hit_info().document_id() == next_document_id) {
current_iterators_.push_back(iterator.get());
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
doc_hit_info_ = iterator->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- iterator->hit_intersect_section_ids_mask();
} else {
doc_hit_info_.MergeSectionsFrom(
iterator->doc_hit_info().hit_section_ids_mask());
- hit_intersect_section_ids_mask_ &=
- iterator->hit_intersect_section_ids_mask();
}
}
}
return libtextclassifier3::Status::OK;
}
-int32_t DocHitInfoIteratorOrNary::GetNumBlocksInspected() const {
- int32_t blockCount = 0;
- for (const auto& iter : iterators_) {
- blockCount += iter->GetNumBlocksInspected();
- }
- return blockCount;
-}
-
-int32_t DocHitInfoIteratorOrNary::GetNumLeafAdvanceCalls() const {
- int32_t leafCount = 0;
+DocHitInfoIterator::CallStats DocHitInfoIteratorOrNary::GetCallStats() const {
+ CallStats call_stats;
for (const auto& iter : iterators_) {
- leafCount += iter->GetNumLeafAdvanceCalls();
+ call_stats += iter->GetCallStats();
}
- return leafCount;
+ return call_stats;
}
std::string DocHitInfoIteratorOrNary::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 1e9847d..8c0427b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -16,7 +16,9 @@
#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_OR_H_
#include <cstdint>
+#include <memory>
#include <string>
+#include <utility>
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -38,12 +40,17 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
- int32_t GetNumBlocksInspected() const override;
-
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override {
+ return left_->GetCallStats() + right_->GetCallStats();
+ }
std::string ToString() const override;
+ void MapChildren(const ChildrenMapper &mapper) override {
+ left_ = mapper(std::move(left_));
+ right_ = mapper(std::move(right_));
+ }
+
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo> *matched_terms_stats,
SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
@@ -83,12 +90,16 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
- int32_t GetNumBlocksInspected() const override;
-
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override;
std::string ToString() const override;
+ void MapChildren(const ChildrenMapper &mapper) override {
+ for (int i = 0; i < iterators_.size(); ++i) {
+ iterators_[i] = mapper(std::move(iterators_[i]));
+ }
+ }
+
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo> *matched_terms_stats,
SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
index 1950c01..d198b53 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
@@ -73,38 +73,33 @@ TEST(DocHitInfoIteratorOrTest, Initialize) {
std::make_unique<DocHitInfoIteratorDummy>());
// We start out with invalid values
- EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
- EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone));
+ EXPECT_THAT(or_iter.doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
}
-TEST(DocHitInfoIteratorOrTest, GetNumBlocksInspected) {
- int first_iter_blocks = 4; // arbitrary value
+TEST(DocHitInfoIteratorOrTest, GetCallStats) {
+ DocHitInfoIterator::CallStats first_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumBlocksInspected(first_iter_blocks);
-
- int second_iter_blocks = 7; // arbitrary value
+ first_iter->SetCallStats(first_iter_call_stats);
+
+ DocHitInfoIterator::CallStats second_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/6,
+ /*num_leaf_advance_calls_main_index_in=*/2,
+ /*num_leaf_advance_calls_integer_index_in=*/10,
+ /*num_leaf_advance_calls_no_index_in=*/3,
+ /*num_blocks_inspected_in=*/7); // arbitrary value
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumBlocksInspected(second_iter_blocks);
+ second_iter->SetCallStats(second_iter_call_stats);
DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
- EXPECT_THAT(or_iter.GetNumBlocksInspected(),
- Eq(first_iter_blocks + second_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorOrTest, GetNumLeafAdvanceCalls) {
- int first_iter_leaves = 4; // arbitrary value
- auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
- int second_iter_leaves = 7; // arbitrary value
- auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
- DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
-
- EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(),
- Eq(first_iter_leaves + second_iter_leaves));
+ EXPECT_THAT(or_iter.GetCallStats(),
+ Eq(first_iter_call_stats + second_iter_call_stats));
}
TEST(DocHitInfoIteratorOrTest, Advance) {
@@ -155,24 +150,22 @@ TEST(DocHitInfoIteratorOrTest, SectionIdMask) {
// Created to test correct section_id_mask behavior.
SectionIdMask section_id_mask1 = 0b01010101; // hits in sections 0, 2, 4, 6
SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2
- SectionIdMask mask_anded_result = 0b00000100;
SectionIdMask mask_ored_result = 0b01010111;
std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
ICING_EXPECT_OK(or_iter.Advance());
EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(),
Eq(mask_ored_result));
- EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
}
TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
@@ -200,11 +193,11 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
auto first_iter =
std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter =
std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
std::vector<TermMatchInfo> matched_terms_stats;
@@ -238,11 +231,11 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
auto first_iter =
std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter =
std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hi");
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ second_iter->set_hit_section_ids_mask(section_id_mask1);
DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
std::vector<TermMatchInfo> matched_terms_stats;
@@ -281,11 +274,11 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
auto first_iter =
std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter =
std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
std::vector<TermMatchInfo> matched_terms_stats;
@@ -362,8 +355,8 @@ TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
DocHitInfoIteratorOrNary or_iter(std::move(iterators));
// We start out with invalid values
- EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
- EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone));
+ EXPECT_THAT(or_iter.doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
}
TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) {
@@ -376,51 +369,42 @@ TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(DocHitInfoIteratorOrNaryTest, GetNumBlocksInspected) {
- int first_iter_blocks = 4; // arbitrary value
+TEST(DocHitInfoIteratorOrNaryTest, GetCallStats) {
+ DocHitInfoIterator::CallStats first_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumBlocksInspected(first_iter_blocks);
-
- int second_iter_blocks = 7; // arbitrary value
+ first_iter->SetCallStats(first_iter_call_stats);
+
+ DocHitInfoIterator::CallStats second_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/6,
+ /*num_leaf_advance_calls_main_index_in=*/2,
+ /*num_leaf_advance_calls_integer_index_in=*/10,
+ /*num_leaf_advance_calls_no_index_in=*/3,
+ /*num_blocks_inspected_in=*/7); // arbitrary value
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumBlocksInspected(second_iter_blocks);
-
- int third_iter_blocks = 13; // arbitrary value
+ second_iter->SetCallStats(second_iter_call_stats);
+
+ DocHitInfoIterator::CallStats third_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/1000,
+ /*num_leaf_advance_calls_main_index_in=*/2000,
+ /*num_leaf_advance_calls_integer_index_in=*/3000,
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/200); // arbitrary value
auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
- third_iter->SetNumBlocksInspected(third_iter_blocks);
-
- int fourth_iter_blocks = 1; // arbitrary value
- auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
- fourth_iter->SetNumBlocksInspected(fourth_iter_blocks);
-
- std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
- iterators.push_back(std::move(first_iter));
- iterators.push_back(std::move(second_iter));
- iterators.push_back(std::move(third_iter));
- iterators.push_back(std::move(fourth_iter));
- DocHitInfoIteratorOrNary or_iter(std::move(iterators));
-
- EXPECT_THAT(or_iter.GetNumBlocksInspected(),
- Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks +
- fourth_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) {
- int first_iter_leaves = 4; // arbitrary value
- auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
- first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
- int second_iter_leaves = 7; // arbitrary value
- auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
- second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
- int third_iter_leaves = 13; // arbitrary value
- auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
- third_iter->SetNumLeafAdvanceCalls(third_iter_leaves);
-
- int fourth_iter_leaves = 13; // arbitrary value
+ third_iter->SetCallStats(third_iter_call_stats);
+
+ DocHitInfoIterator::CallStats fourth_iter_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/200,
+ /*num_leaf_advance_calls_main_index_in=*/400,
+ /*num_leaf_advance_calls_integer_index_in=*/100,
+ /*num_leaf_advance_calls_no_index_in=*/20,
+ /*num_blocks_inspected_in=*/50); // arbitrary value
auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
- fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves);
+ fourth_iter->SetCallStats(fourth_iter_call_stats);
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
iterators.push_back(std::move(first_iter));
@@ -429,9 +413,9 @@ TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) {
iterators.push_back(std::move(fourth_iter));
DocHitInfoIteratorOrNary or_iter(std::move(iterators));
- EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(),
- Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves +
- fourth_iter_leaves));
+ EXPECT_THAT(or_iter.GetCallStats(),
+ Eq(first_iter_call_stats + second_iter_call_stats +
+ third_iter_call_stats + fourth_iter_call_stats));
}
TEST(DocHitInfoIteratorOrNaryTest, Advance) {
@@ -460,7 +444,6 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2
SectionIdMask section_id_mask3 = 0b00001100; // hits in sections 2, 3
SectionIdMask section_id_mask4 = 0b00100100; // hits in sections 2, 5
- SectionIdMask mask_anded_result = 0b00000100;
SectionIdMask mask_ored_result = 0b01101111;
std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
@@ -469,16 +452,16 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4, section_id_mask4)};
auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
- first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+ first_iter->set_hit_section_ids_mask(section_id_mask1);
auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
- second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+ second_iter->set_hit_section_ids_mask(section_id_mask2);
auto third_iter = std::make_unique<DocHitInfoIteratorDummy>(third_vector);
- third_iter->set_hit_intersect_section_ids_mask(section_id_mask3);
+ third_iter->set_hit_section_ids_mask(section_id_mask3);
auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>(fourth_vector);
- fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4);
+ fourth_iter->set_hit_section_ids_mask(section_id_mask4);
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
iterators.push_back(std::move(first_iter));
@@ -491,7 +474,6 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
ICING_EXPECT_OK(or_iter.Advance());
EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(),
Eq(mask_ored_result));
- EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
}
TEST(DocHitInfoIteratorOrNaryTest, PopulateMatchedTermsStats) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc
new file mode 100644
index 0000000..e6a1c67
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc
@@ -0,0 +1,65 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-document.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorPropertyInDocument::DocHitInfoIteratorPropertyInDocument(
+ std::unique_ptr<DocHitInfoIterator> meta_hit_iterator)
+ : meta_hit_iterator_(std::move(meta_hit_iterator)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorPropertyInDocument::Advance() {
+ while (meta_hit_iterator_->Advance().ok()) {
+ // Currently, the metadata hits added by PropertyExistenceIndexingHandler
+ // can only have a section id of 0, so the section mask has to be 1 << 0.
+ if (meta_hit_iterator_->doc_hit_info().hit_section_ids_mask() == (1 << 0)) {
+ doc_hit_info_ = meta_hit_iterator_->doc_hit_info();
+ // Hits returned by "hasProperty" should not be associated with any
+ // section.
+ doc_hit_info_.set_hit_section_ids_mask(/*section_id_mask=*/0);
+ return libtextclassifier3::Status::OK;
+ }
+ }
+
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorPropertyInDocument::TrimRightMostNode() && {
+ // Don't generate suggestion if the last operator is this custom function.
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is hasProperty().");
+}
+
+std::string DocHitInfoIteratorPropertyInDocument::ToString() const {
+ return meta_hit_iterator_->ToString();
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-document.h b/icing/index/iterator/doc-hit-info-iterator-property-in-document.h
new file mode 100644
index 0000000..bb2c97a
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-document.h
@@ -0,0 +1,73 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// The iterator returned by the "hasProperty" function in advanced query that
+// post-processes metadata hits added by PropertyExistenceIndexingHandler.
+// Specifically, it filters out hits that are not recognized as metadata, and
+// always set hit_section_ids_mask to 0.
+//
+// It is marked as a subclass of DocHitInfoLeafIterator because section
+// restriction should not be passed down to meta_hit_iterator.
+class DocHitInfoIteratorPropertyInDocument : public DocHitInfoLeafIterator {
+ public:
+ explicit DocHitInfoIteratorPropertyInDocument(
+ std::unique_ptr<DocHitInfoIterator> meta_hit_iterator);
+
+ libtextclassifier3::Status Advance() override;
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+ CallStats GetCallStats() const override {
+ return meta_hit_iterator_->GetCallStats();
+ }
+
+ std::string ToString() const override;
+
+ void PopulateMatchedTermsStats(
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ meta_hit_iterator_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ }
+
+ private:
+ std::unique_ptr<DocHitInfoIterator> meta_hit_iterator_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
index 05778b0..8b98302 100644
--- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
@@ -45,7 +45,6 @@ DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema(
libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() {
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
// Maps from SchemaTypeId to a bool indicating whether or not the type has
// the requested property.
@@ -77,9 +76,6 @@ libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() {
if (valid_match) {
doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- delegate_->hit_intersect_section_ids_mask();
- doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
return libtextclassifier3::Status::OK;
}
@@ -98,14 +94,6 @@ DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && {
"Cannot generate suggestion if the last term is hasPropertyDefined().");
}
-int32_t DocHitInfoIteratorPropertyInSchema::GetNumBlocksInspected() const {
- return delegate_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorPropertyInSchema::GetNumLeafAdvanceCalls() const {
- return delegate_->GetNumLeafAdvanceCalls();
-}
-
std::string DocHitInfoIteratorPropertyInSchema::ToString() const {
return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","),
"): ", delegate_->ToString());
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
index 730c497..c16a1c4 100644
--- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <string_view>
+#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -45,9 +46,11 @@ class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override;
+ void MapChildren(const ChildrenMapper& mapper) override {
+ delegate_ = mapper(std::move(delegate_));
+ }
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
std::string ToString() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
index 47f5cc5..3f5a0a7 100644
--- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
@@ -199,8 +199,7 @@ TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
auto original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
- original_iterator->set_hit_intersect_section_ids_mask(
- original_section_id_mask);
+ original_iterator->set_hit_section_ids_mask(original_section_id_mask);
DocHitInfoIteratorPropertyInSchema property_defined_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
@@ -218,7 +217,7 @@ TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
// The expected mask is the same as the original mask, since the iterator
// should treat it as a pass-through.
SectionIdMask expected_section_id_mask = original_section_id_mask;
- EXPECT_EQ(property_defined_iterator.hit_intersect_section_ids_mask(),
+ EXPECT_EQ(property_defined_iterator.doc_hit_info().hit_section_ids_mask(),
expected_section_id_mask);
property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index b850a9b..35dc0b9 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -16,137 +16,142 @@
#include <cstdint>
#include <memory>
+#include <set>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/iterator/section-restrict-data.h"
+#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+// An iterator that simply takes ownership of SectionRestrictData.
+class SectionRestrictDataHolderIterator : public DocHitInfoIterator {
+ public:
+ explicit SectionRestrictDataHolderIterator(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ std::unique_ptr<SectionRestrictData> data)
+ : delegate_(std::move(delegate)), data_(std::move(data)) {}
+
+ libtextclassifier3::Status Advance() override {
+ auto result = delegate_->Advance();
+ doc_hit_info_ = delegate_->doc_hit_info();
+ return result;
+ }
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+ std::move(*delegate_).TrimRightMostNode());
+ if (trimmed_delegate.iterator_ != nullptr) {
+ trimmed_delegate.iterator_ =
+ std::make_unique<SectionRestrictDataHolderIterator>(
+ std::move(trimmed_delegate.iterator_), std::move(data_));
+ }
+ return trimmed_delegate;
+ }
+
+ void MapChildren(const ChildrenMapper& mapper) override {
+ delegate_ = mapper(std::move(delegate_));
+ }
+
+ CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
+
+ std::string ToString() const override { return delegate_->ToString(); }
+
+ void PopulateMatchedTermsStats(
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask) const override {
+ return delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ }
+
+ private:
+ std::unique_ptr<DocHitInfoIterator> delegate_;
+ std::unique_ptr<SectionRestrictData> data_;
+};
+
DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
+ std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data)
+ : delegate_(std::move(delegate)), data_(data) {}
+
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
- std::set<std::string> target_sections, int64_t current_time_ms)
- : delegate_(std::move(delegate)),
- document_store_(*document_store),
- schema_store_(*schema_store),
- current_time_ms_(current_time_ms) {
- type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] =
+ std::set<std::string> target_sections, int64_t current_time_ms) {
+ std::unordered_map<std::string, std::set<std::string>> type_property_filters;
+ type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] =
std::move(target_sections);
+ auto data = std::make_unique<SectionRestrictData>(
+ document_store, schema_store, current_time_ms, type_property_filters);
+ std::unique_ptr<DocHitInfoIterator> result =
+ ApplyRestrictions(std::move(iterator), data.get());
+ return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
+ std::move(data));
}
-DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const SearchSpecProto& search_spec,
- int64_t current_time_ms)
- : delegate_(std::move(delegate)),
- document_store_(*document_store),
- schema_store_(*schema_store),
- current_time_ms_(current_time_ms) {
+ const SearchSpecProto& search_spec, int64_t current_time_ms) {
+ std::unordered_map<std::string, std::set<std::string>> type_property_filters;
// TODO(b/294274922): Add support for polymorphism in type property filters.
for (const TypePropertyMask& type_property_mask :
- search_spec.type_property_filters()) {
- type_property_filters_[type_property_mask.schema_type()] =
+ search_spec.type_property_filters()) {
+ type_property_filters[type_property_mask.schema_type()] =
std::set<std::string>(type_property_mask.paths().begin(),
type_property_mask.paths().end());
}
+ auto data = std::make_unique<SectionRestrictData>(
+ document_store, schema_store, current_time_ms, type_property_filters);
+ std::unique_ptr<DocHitInfoIterator> result =
+ ApplyRestrictions(std::move(iterator), data.get());
+ return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
+ std::move(data));
}
-DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
- const DocumentStore* document_store, const SchemaStore* schema_store,
- std::unordered_map<std::string, std::set<std::string>>
- type_property_filters,
- std::unordered_map<std::string, SectionIdMask> type_property_masks,
- int64_t current_time_ms)
- : delegate_(std::move(delegate)),
- document_store_(*document_store),
- schema_store_(*schema_store),
- current_time_ms_(current_time_ms),
- type_property_filters_(std::move(type_property_filters)),
- type_property_masks_(std::move(type_property_masks)) {}
-
-SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask(
- const std::string& schema_type,
- const std::set<std::string>& target_sections) const {
- SectionIdMask section_mask = kSectionIdMaskNone;
- auto section_metadata_list_or =
- schema_store_.GetSectionMetadata(schema_type);
- if (!section_metadata_list_or.ok()) {
- // The current schema doesn't have section metadata.
- return kSectionIdMaskNone;
- }
- const std::vector<SectionMetadata>* section_metadata_list =
- section_metadata_list_or.ValueOrDie();
- for (const SectionMetadata& section_metadata : *section_metadata_list) {
- if (target_sections.find(section_metadata.path) !=
- target_sections.end()) {
- section_mask |= UINT64_C(1) << section_metadata.id;
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) {
+ ChildrenMapper mapper;
+ mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator)
+ -> std::unique_ptr<DocHitInfoIterator> {
+ if (iterator->is_leaf()) {
+ return std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(iterator), data);
+ } else {
+ iterator->MapChildren(mapper);
+ return iterator;
}
- }
- return section_mask;
-}
-
-SectionIdMask DocHitInfoIteratorSectionRestrict::
- ComputeAndCacheSchemaTypeAllowedSectionsMask(
- const std::string& schema_type) {
- if (const auto type_property_mask_itr =
- type_property_masks_.find(schema_type);
- type_property_mask_itr != type_property_masks_.end()) {
- return type_property_mask_itr->second;
- }
-
- // Section id mask of schema_type is never calculated before, so
- // calculate it here and put it into type_property_masks_.
- // - If type property filters of schema_type or wildcard (*) are
- // specified, then create a mask according to the filters.
- // - Otherwise, create a mask to match all properties.
- SectionIdMask new_section_id_mask = kSectionIdMaskAll;
- if (const auto itr = type_property_filters_.find(schema_type);
- itr != type_property_filters_.end()) {
- // Property filters defined for given schema type
- new_section_id_mask = GenerateSectionMask(
- schema_type, itr->second);
- } else if (const auto wildcard_itr = type_property_filters_.find(
- std::string(SchemaStore::kSchemaTypeWildcard));
- wildcard_itr != type_property_filters_.end()) {
- // Property filters defined for wildcard entry
- new_section_id_mask = GenerateSectionMask(
- schema_type, wildcard_itr->second);
- } else {
- // Do not cache the section mask if no property filters apply to this schema
- // type to avoid taking up unnecessary space.
- return kSectionIdMaskAll;
- }
-
- type_property_masks_[schema_type] = new_section_id_mask;
- return new_section_id_mask;
+ };
+ return mapper(std::move(iterator));
}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
while (delegate_->Advance().ok()) {
DocumentId document_id = delegate_->doc_hit_info().document_id();
- SectionIdMask section_id_mask =
- delegate_->doc_hit_info().hit_section_ids_mask();
-
- auto data_optional = document_store_.GetAliveDocumentFilterData(
- document_id, current_time_ms_);
+ auto data_optional = data_->document_store().GetAliveDocumentFilterData(
+ document_id, data_->current_time_ms());
if (!data_optional) {
// Ran into some error retrieving information on this hit, skip
continue;
@@ -154,34 +159,35 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
- auto schema_type_or = schema_store_.GetSchemaType(schema_type_id);
+ auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id);
if (!schema_type_or.ok()) {
// Ran into error retrieving schema type, skip
continue;
}
const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
SectionIdMask allowed_sections_mask =
- ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type);
+ data_->ComputeAllowedSectionsMask(*schema_type);
// A hit can be in multiple sections at once, need to check which of the
// section ids match the sections allowed by type_property_masks_. This can
// be done by doing a bitwise and of the section_id_mask in the doc hit and
// the allowed_sections_mask.
- hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask;
+ SectionIdMask section_id_mask =
+ delegate_->doc_hit_info().hit_section_ids_mask() &
+ allowed_sections_mask;
// Return this document if:
// - the sectionIdMask is not empty after applying property filters, or
// - no property filters apply for its schema type (allowed_sections_mask
// == kSectionIdMaskAll). This is needed to ensure that in case of empty
// query (which uses doc-hit-info-iterator-all-document-id), where
- // section_id_mask (and hence hit_intersect_section_ids_mask_) is
- // kSectionIdMaskNone, doc hits with no property restrictions don't get
- // filtered out. Doc hits for schema types for whom property filters are
- // specified will still get filtered out.
- if (allowed_sections_mask == kSectionIdMaskAll
- || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
+ // section_id_mask is kSectionIdMaskNone, doc hits with no property
+ // restrictions don't get filtered out. Doc hits for schema types for
+ // whom property filters are specified will still get filtered out.
+ if (allowed_sections_mask == kSectionIdMaskAll ||
+ section_id_mask != kSectionIdMaskNone) {
doc_hit_info_ = delegate_->doc_hit_info();
- doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ doc_hit_info_.set_hit_section_ids_mask(section_id_mask);
return libtextclassifier3::Status::OK;
}
// Didn't find a matching section name for this hit. Continue.
@@ -200,14 +206,14 @@ DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
// type_property_filters_ when code flow reaches here. If the InternalError
// below is returned, that means TrimRightMostNode hasn't been called in the
// right context.
- const auto it = type_property_filters_.find("*");
- if (it == type_property_filters_.end()) {
+ const auto it = data_->type_property_filters().find("*");
+ if (it == data_->type_property_filters().end()) {
return absl_ports::InternalError(
"A wildcard entry should always be present in type property filters "
"whenever TrimRightMostNode() is called for "
"DocHitInfoIteratorSectionRestrict");
}
- std::set<std::string>& target_sections = it->second;
+ const std::set<std::string>& target_sections = it->second;
if (target_sections.empty()) {
return absl_ports::InternalError(
"Target sections should not be empty whenever TrimRightMostNode() is "
@@ -222,24 +228,14 @@ DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
trimmed_delegate.iterator_ =
std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
new DocHitInfoIteratorSectionRestrict(
- std::move(trimmed_delegate.iterator_), &document_store_,
- &schema_store_, std::move(type_property_filters_),
- std::move(type_property_masks_), current_time_ms_));
+ std::move(trimmed_delegate.iterator_), std::move(data_)));
return std::move(trimmed_delegate);
}
-int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
- return delegate_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
- return delegate_->GetNumLeafAdvanceCalls();
-}
-
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
std::string output = "";
- for (auto it = type_property_filters_.cbegin();
- it != type_property_filters_.cend(); it++) {
+ for (auto it = data_->type_property_filters().cbegin();
+ it != data_->type_property_filters().cend(); it++) {
std::string paths = absl_ports::StrJoin(it->second, ",");
output += (it->first) + ":" + (paths) + "; ";
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index 5d44ed7..387ff52 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -17,15 +17,18 @@
#include <cstdint>
#include <memory>
+#include <set>
#include <string>
-#include <string_view>
-#include <unordered_map>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/iterator/section-restrict-data.h"
+#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
-#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
namespace icing {
@@ -38,36 +41,48 @@ namespace lib {
// That class is meant to be applied to the root of a query tree and filter over
// all results at the end. This class is more used in the limited scope of a
// term or a small group of terms.
-class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
+class DocHitInfoIteratorSectionRestrict : public DocHitInfoLeafIterator {
public:
// Does not take any ownership, and all pointers must refer to valid objects
// that outlive the one constructed.
explicit DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
+ std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data);
+
+ // Methods that apply section restrictions to all DocHitInfoLeafIterator nodes
+ // inside the provided iterator tree, and return the root of the tree
+ // afterwards. These methods do not take any ownership for the raw pointer
+ // parameters, which must refer to valid objects that outlive the iterator
+ // returned.
+ static std::unique_ptr<DocHitInfoIterator> ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
std::set<std::string> target_sections, int64_t current_time_ms);
-
- explicit DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
+ static std::unique_ptr<DocHitInfoIterator> ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const SearchSpecProto& search_spec,
- int64_t current_time_ms);
+ const SearchSpecProto& search_spec, int64_t current_time_ms);
+ static std::unique_ptr<DocHitInfoIterator> ApplyRestrictions(
+ std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data);
libtextclassifier3::Status Advance() override;
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override;
-
- int32_t GetNumLeafAdvanceCalls() const override;
+ CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
std::string ToString() const override;
- // Note that the DocHitInfoIteratorSectionRestrict is the only iterator that
- // should set filtering_section_mask, hence the received
- // filtering_section_mask is ignored and the filtering_section_mask passed to
- // the delegate will be set to hit_intersect_section_ids_mask_. This will
- // allow to filter the matching sections in the delegate.
+ // Note that the DocHitInfoIteratorSectionRestrict can only be applied at
+ // DocHitInfoLeafIterator, which can be a term iterator or another
+ // DocHitInfoIteratorSectionRestrict.
+ //
+ // To filter the matching sections, filtering_section_mask should be set to
+ // doc_hit_info_.hit_section_ids_mask() held in the outermost
+ // DocHitInfoIteratorSectionRestrict, which is equal to the intersection of
+ // all hit_section_ids_mask in the DocHitInfoIteratorSectionRestrict chain,
+ // since for any two section restrict iterators chained together, the outer
+ // one's hit_section_ids_mask is always a subset of the inner one's
+ // hit_section_ids_mask.
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
@@ -77,55 +92,14 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
}
delegate_->PopulateMatchedTermsStats(
matched_terms_stats,
- /*filtering_section_mask=*/hit_intersect_section_ids_mask_);
+ /*filtering_section_mask=*/filtering_section_mask &
+ doc_hit_info_.hit_section_ids_mask());
}
private:
- explicit DocHitInfoIteratorSectionRestrict(
- std::unique_ptr<DocHitInfoIterator> delegate,
- const DocumentStore* document_store, const SchemaStore* schema_store,
- std::unordered_map<std::string, std::set<std::string>>
- type_property_filters,
- std::unordered_map<std::string, SectionIdMask> type_property_masks,
- int64_t current_time_ms);
- // Calculates the section mask of allowed sections(determined by the property
- // filters map) for the given schema type and caches the same for any future
- // calls.
- //
- // Returns:
- // - If type_property_filters_ has an entry for the given schema type or
- // wildcard(*), return a bitwise or of section IDs in the schema type that
- // that are also present in the relevant filter list.
- // - Otherwise, return kSectionIdMaskAll.
- SectionIdMask ComputeAndCacheSchemaTypeAllowedSectionsMask(
- const std::string& schema_type);
- // Generates a section mask for the given schema type and the target sections.
- //
- // Returns:
- // - A bitwise or of section IDs in the schema_type that that are also
- // present in the target_sections list.
- // - If none of the sections in the schema_type are present in the
- // target_sections list, return kSectionIdMaskNone.
- // This is done by doing a bitwise or of the target section ids for the given
- // schema type.
- SectionIdMask GenerateSectionMask(const std::string& schema_type,
- const std::set<std::string>&
- target_sections) const;
-
std::unique_ptr<DocHitInfoIterator> delegate_;
- const DocumentStore& document_store_;
- const SchemaStore& schema_store_;
- int64_t current_time_ms_;
-
- // Map of property filters per schema type. Supports wildcard(*) for schema
- // type that will apply to all schema types that are not specifically
- // specified in the mapping otherwise.
- std::unordered_map<std::string, std::set<std::string>>
- type_property_filters_;
- // Mapping of schema type to the section mask of allowed sections for that
- // schema type. This section mask is lazily calculated based on the specified
- // property filters and cached for any future use.
- std::unordered_map<std::string, SectionIdMask> type_property_masks_;
+ // Does not own.
+ SectionRestrictData* data_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 1500571..ee65fe1 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -15,6 +15,7 @@
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include <memory>
+#include <set>
#include <string>
#include <utility>
#include <vector>
@@ -150,48 +151,50 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
auto original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
- original_iterator->set_hit_intersect_section_ids_mask(
- original_section_id_mask);
+ original_iterator->set_hit_section_ids_mask(original_section_id_mask);
// Filtering for the indexed section name (which has a section id of 0) should
// get a result.
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_0},
- fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
std::vector<TermMatchInfo> matched_terms_stats;
- section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, IsEmpty());
- ICING_EXPECT_OK(section_restrict_iterator.Advance());
- EXPECT_THAT(section_restrict_iterator.doc_hit_info().document_id(),
+ ICING_EXPECT_OK(section_restrict_iterator->Advance());
+ EXPECT_THAT(section_restrict_iterator->doc_hit_info().document_id(),
Eq(document_id));
SectionIdMask expected_section_id_mask = 0b00000001; // hits in sections 0
- EXPECT_EQ(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ EXPECT_EQ(section_restrict_iterator->doc_hit_info().hit_section_ids_mask(),
expected_section_id_mask);
- section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{0, 1}};
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"hi", expected_section_ids_tf_map)));
- EXPECT_FALSE(section_restrict_iterator.Advance().ok());
+ EXPECT_FALSE(section_restrict_iterator->Advance().ok());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
- DocHitInfoIteratorSectionRestrict filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), /*target_sections=*/std::set<std::string>(),
- fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> filtered_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator_empty), document_store_.get(),
+ schema_store_.get(), /*target_sections=*/std::set<std::string>(),
+ fake_clock_.GetSystemTimeMilliseconds());
- EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+ EXPECT_THAT(GetDocumentIds(filtered_iterator.get()), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
- filtered_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ filtered_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, IsEmpty());
}
@@ -210,12 +213,14 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// Filtering for the indexed section name should get a result
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_0},
- fake_clock_.GetSystemTimeMilliseconds());
-
- EXPECT_THAT(GetDocumentIds(&section_restrict_iterator),
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()),
ElementsAre(document_id));
}
@@ -236,18 +241,18 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// Filter for both target_sections
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_0, indexed_section_1},
- fake_clock_.GetSystemTimeMilliseconds());
-
- ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_0, indexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ ICING_ASSERT_OK(section_restrict_iterator->Advance());
std::vector<SectionId> expected_section_ids = {kIndexedSectionId0,
kIndexedSectionId1};
- EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EXPECT_THAT(section_restrict_iterator->doc_hit_info(),
EqualsDocHitInfo(document_id, expected_section_ids));
- EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
- Eq(section_id_mask));
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -267,17 +272,17 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// Filter for both target_sections
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_1},
- fake_clock_.GetSystemTimeMilliseconds());
-
- ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ ICING_ASSERT_OK(section_restrict_iterator->Advance());
std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
- EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EXPECT_THAT(section_restrict_iterator->doc_hit_info(),
EqualsDocHitInfo(document_id, expected_section_ids));
- EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
- Eq(1U << kIndexedSectionId1));
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -296,17 +301,17 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// Filter for both target_sections
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_0, indexed_section_1},
- fake_clock_.GetSystemTimeMilliseconds());
-
- ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_0, indexed_section_1},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ ICING_ASSERT_OK(section_restrict_iterator->Advance());
std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
- EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EXPECT_THAT(section_restrict_iterator->doc_hit_info(),
EqualsDocHitInfo(document_id, expected_section_ids));
- EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
- Eq(1U << kIndexedSectionId1));
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
@@ -317,13 +322,15 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// Filtering for the indexed section name should get a result
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
- EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
- section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, IsEmpty());
}
@@ -343,14 +350,16 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// Filtering for the indexed section name should get a result
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{"some_section_name"},
- fake_clock_.GetSystemTimeMilliseconds());
-
- EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{"some_section_name"},
+ fake_clock_.GetSystemTimeMilliseconds());
+
+ EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
- section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, IsEmpty());
}
@@ -368,14 +377,16 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_0},
- fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
- EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
- section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, IsEmpty());
}
@@ -396,37 +407,37 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{indexed_section_0},
- fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
- EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
- section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, IsEmpty());
}
-TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
+TEST_F(DocHitInfoIteratorSectionRestrictTest, GetCallStats) {
+ DocHitInfoIterator::CallStats original_call_stats(
+ /*num_leaf_advance_calls_lite_index_in=*/2,
+ /*num_leaf_advance_calls_main_index_in=*/5,
+ /*num_leaf_advance_calls_integer_index_in=*/3,
+ /*num_leaf_advance_calls_no_index_in=*/1,
+ /*num_blocks_inspected_in=*/4); // arbitrary value
auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
- original_iterator->SetNumBlocksInspected(5);
+ original_iterator->SetCallStats(original_call_stats);
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(),
+ /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
- EXPECT_THAT(section_restrict_iterator.GetNumBlocksInspected(), Eq(5));
-}
-
-TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumLeafAdvanceCalls) {
- auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
- original_iterator->SetNumLeafAdvanceCalls(6);
-
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
-
- EXPECT_THAT(section_restrict_iterator.GetNumLeafAdvanceCalls(), Eq(6));
+ EXPECT_THAT(section_restrict_iterator->GetCallStats(),
+ Eq(original_call_stats));
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -444,12 +455,10 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// Anything that's not 0, which is the indexed property
SectionId not_matching_section_id = 2;
- // Build an interator tree like:
- // Restrict
- // |
+ // Build an iterator tree like:
// AND
// / \
- // [1, 1],[2, 2] [3, 2]
+ // [1, 1],[2, 2] [3, 2]
std::vector<DocHitInfo> left_infos = {
DocHitInfo(document_id1, 1U << matching_section_id),
DocHitInfo(document_id2, 1U << not_matching_section_id)};
@@ -460,14 +469,21 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
std::make_unique<DocHitInfoIteratorDummy>(left_infos);
std::unique_ptr<DocHitInfoIterator> right_iterator =
std::make_unique<DocHitInfoIteratorDummy>(right_infos, "term", 10);
-
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iterator),
std::move(right_iterator));
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- {indexed_section_0}, fake_clock_.GetSystemTimeMilliseconds());
+ // After applying section restriction:
+ // AND
+ // / \
+ // Restrict Restrict
+ // | |
+ // [1, 1],[2, 2] [3, 2]
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(), {indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
// The trimmed tree.
// Restrict
@@ -475,12 +491,12 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// [1, 1],[2, 2]
ICING_ASSERT_OK_AND_ASSIGN(
DocHitInfoIterator::TrimmedNode node,
- std::move(section_restrict_iterator).TrimRightMostNode());
+ std::move(*section_restrict_iterator).TrimRightMostNode());
EXPECT_THAT(GetDocumentIds(node.iterator_.get()), ElementsAre(document_id1));
EXPECT_THAT(node.term_, Eq("term"));
EXPECT_THAT(node.term_start_index_, Eq(10));
- EXPECT_THAT(node.target_section_, Eq(""));
+ EXPECT_THAT(node.target_section_, Eq(indexed_section_0));
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) {
@@ -505,14 +521,16 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_infos, "term", 10);
- DocHitInfoIteratorSectionRestrict section_restrict_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- {indexed_section_0}, fake_clock_.GetSystemTimeMilliseconds());
+ std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(original_iterator), document_store_.get(),
+ schema_store_.get(), {indexed_section_0},
+ fake_clock_.GetSystemTimeMilliseconds());
// The trimmed tree has null iterator but has target section.
ICING_ASSERT_OK_AND_ASSIGN(
DocHitInfoIterator::TrimmedNode node,
- std::move(section_restrict_iterator).TrimRightMostNode());
+ std::move(*section_restrict_iterator).TrimRightMostNode());
EXPECT_THAT(node.iterator_, testing::IsNull());
EXPECT_THAT(node.term_, Eq("term"));
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index a77b91c..c75fb33 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -71,7 +71,7 @@ class DocHitInfoTermFrequencyPair {
// will then proceed to return the doc_hit_infos in order as Advance's are
// called. After all doc_hit_infos are returned, Advance will return a NotFound
// error (also like normal DocHitInfoIterators).
-class DocHitInfoIteratorDummy : public DocHitInfoIterator {
+class DocHitInfoIteratorDummy : public DocHitInfoLeafIterator {
public:
DocHitInfoIteratorDummy() = default;
explicit DocHitInfoIteratorDummy(
@@ -140,25 +140,14 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
matched_terms_stats->push_back(term_stats);
}
- void set_hit_intersect_section_ids_mask(
- SectionIdMask hit_intersect_section_ids_mask) {
- hit_intersect_section_ids_mask_ = hit_intersect_section_ids_mask;
+ void set_hit_section_ids_mask(SectionIdMask hit_section_ids_mask) {
+ doc_hit_info_.set_hit_section_ids_mask(hit_section_ids_mask);
}
- int32_t GetNumBlocksInspected() const override {
- return num_blocks_inspected_;
- }
-
- void SetNumBlocksInspected(int32_t num_blocks_inspected) {
- num_blocks_inspected_ = num_blocks_inspected;
- }
-
- int32_t GetNumLeafAdvanceCalls() const override {
- return num_leaf_advance_calls_;
- }
+ CallStats GetCallStats() const override { return call_stats_; }
- void SetNumLeafAdvanceCalls(int32_t num_leaf_advance_calls) {
- num_leaf_advance_calls_ = num_leaf_advance_calls;
+ void SetCallStats(CallStats call_stats) {
+ call_stats_ = std::move(call_stats);
}
std::string ToString() const override {
@@ -176,8 +165,7 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
private:
int32_t index_ = -1;
- int32_t num_blocks_inspected_ = 0;
- int32_t num_leaf_advance_calls_ = 0;
+ CallStats call_stats_;
std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos_;
std::string term_;
int term_start_index_;
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index d8cd3ad..728f957 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -17,8 +17,12 @@
#include <array>
#include <cstdint>
+#include <functional>
+#include <memory>
#include <string>
#include <string_view>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -52,8 +56,7 @@ struct TermMatchInfo {
// Iterator over DocHitInfos (collapsed Hits) in REVERSE document_id order.
//
-// NOTE: You must call Advance() before calling hit_info() or
-// hit_intersect_section_ids_mask().
+// NOTE: You must call Advance() before calling hit_info().
//
// Example:
// DocHitInfoIterator itr = GetIterator(...);
@@ -62,6 +65,112 @@ struct TermMatchInfo {
// }
class DocHitInfoIterator {
public:
+ using ChildrenMapper = std::function<std::unique_ptr<DocHitInfoIterator>(
+ std::unique_ptr<DocHitInfoIterator>)>;
+
+ // CallStats is a wrapper class of all stats to collect among all levels of
+ // the DocHitInfoIterator tree. Mostly the internal nodes will aggregate the
+ // number of all leaf nodes, while the leaf nodes will return the actual
+ // numbers.
+ struct CallStats {
+ // The number of times Advance() was called on the leaf node for term lite
+ // index.
+ // - Leaf nodes:
+ // - DocHitInfoIteratorTermLite should maintain and set it correctly.
+ // - Others should set it 0.
+ // - Internal nodes: should aggregate values from all children.
+ int32_t num_leaf_advance_calls_lite_index;
+
+ // The number of times Advance() was called on the leaf node for term main
+ // index.
+ // - Leaf nodes:
+ // - DocHitInfoIteratorTermMain should maintain and set it correctly.
+ // - Others should set it 0.
+ // - Internal nodes: should aggregate values from all children.
+ int32_t num_leaf_advance_calls_main_index;
+
+ // The number of times Advance() was called on the leaf node for integer
+ // index.
+ // - Leaf nodes:
+ // - DocHitInfoIteratorNumeric should maintain and set it correctly.
+ // - Others should set it 0.
+ // - Internal nodes: should aggregate values from all children.
+ int32_t num_leaf_advance_calls_integer_index;
+
+ // The number of times Advance() was called on the leaf node without reading
+ // any hits from index. Usually it is a special field for
+ // DocHitInfoIteratorAllDocumentId.
+ // - Leaf nodes:
+ // - DocHitInfoIteratorAllDocumentId should maintain and set it correctly.
+ // - Others should set it 0.
+ // - Internal nodes: should aggregate values from all children.
+ int32_t num_leaf_advance_calls_no_index;
+
+ // The number of flash index blocks that have been read as a result of
+ // operations on this object.
+ // - Leaf nodes: should maintain and set it correctly for all child classes
+ // involving flash index block access.
+ // - Internal nodes: should aggregate values from all children.
+ int32_t num_blocks_inspected;
+
+ explicit CallStats()
+ : CallStats(/*num_leaf_advance_calls_lite_index_in=*/0,
+ /*num_leaf_advance_calls_main_index_in=*/0,
+ /*num_leaf_advance_calls_integer_index_in=*/0,
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/0) {}
+
+ explicit CallStats(int32_t num_leaf_advance_calls_lite_index_in,
+ int32_t num_leaf_advance_calls_main_index_in,
+ int32_t num_leaf_advance_calls_integer_index_in,
+ int32_t num_leaf_advance_calls_no_index_in,
+ int32_t num_blocks_inspected_in)
+ : num_leaf_advance_calls_lite_index(
+ num_leaf_advance_calls_lite_index_in),
+ num_leaf_advance_calls_main_index(
+ num_leaf_advance_calls_main_index_in),
+ num_leaf_advance_calls_integer_index(
+ num_leaf_advance_calls_integer_index_in),
+ num_leaf_advance_calls_no_index(num_leaf_advance_calls_no_index_in),
+ num_blocks_inspected(num_blocks_inspected_in) {}
+
+ int32_t num_leaf_advance_calls() const {
+ return num_leaf_advance_calls_lite_index +
+ num_leaf_advance_calls_main_index +
+ num_leaf_advance_calls_integer_index +
+ num_leaf_advance_calls_no_index;
+ }
+
+ bool operator==(const CallStats& other) const {
+ return num_leaf_advance_calls_lite_index ==
+ other.num_leaf_advance_calls_lite_index &&
+ num_leaf_advance_calls_main_index ==
+ other.num_leaf_advance_calls_main_index &&
+ num_leaf_advance_calls_integer_index ==
+ other.num_leaf_advance_calls_integer_index &&
+ num_leaf_advance_calls_no_index ==
+ other.num_leaf_advance_calls_no_index &&
+ num_blocks_inspected == other.num_blocks_inspected;
+ }
+
+ CallStats operator+(const CallStats& other) const {
+ return CallStats(num_leaf_advance_calls_lite_index +
+ other.num_leaf_advance_calls_lite_index,
+ num_leaf_advance_calls_main_index +
+ other.num_leaf_advance_calls_main_index,
+ num_leaf_advance_calls_integer_index +
+ other.num_leaf_advance_calls_integer_index,
+ num_leaf_advance_calls_no_index +
+ other.num_leaf_advance_calls_no_index,
+ num_blocks_inspected + other.num_blocks_inspected);
+ }
+
+ CallStats& operator+=(const CallStats& other) {
+ *this = *this + other;
+ return *this;
+ }
+ };
+
struct TrimmedNode {
// the query results which we should only search for suggestion in these
// documents.
@@ -100,6 +209,11 @@ class DocHitInfoIterator {
// INVALID_ARGUMENT if the right-most node is not suppose to be trimmed.
virtual libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && = 0;
+ // Map all direct children of this iterator according to the passed mapper.
+ virtual void MapChildren(const ChildrenMapper& mapper) = 0;
+
+ virtual bool is_leaf() { return false; }
+
virtual ~DocHitInfoIterator() = default;
// Returns:
@@ -114,20 +228,8 @@ class DocHitInfoIterator {
// construction or if Advance returned an error.
const DocHitInfo& doc_hit_info() const { return doc_hit_info_; }
- // SectionIdMask representing which sections (if any) have matched *ALL* query
- // terms for the current document_id.
- SectionIdMask hit_intersect_section_ids_mask() const {
- return hit_intersect_section_ids_mask_;
- }
-
- // Gets the number of flash index blocks that have been read as a
- // result of operations on this object.
- virtual int32_t GetNumBlocksInspected() const = 0;
-
- // HitIterators may be constructed into trees. Internal nodes will return the
- // sum of the number of Advance() calls to all leaf nodes. Leaf nodes will
- // return the number of times Advance() was called on it.
- virtual int32_t GetNumLeafAdvanceCalls() const = 0;
+ // Returns CallStats of the DocHitInfoIterator tree.
+ virtual CallStats GetCallStats() const = 0;
// A string representing the iterator.
virtual std::string ToString() const = 0;
@@ -145,7 +247,6 @@ class DocHitInfoIterator {
protected:
DocHitInfo doc_hit_info_;
- SectionIdMask hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
// Helper function to advance the given iterator to at most the given
// document_id.
@@ -160,11 +261,20 @@ class DocHitInfoIterator {
// Didn't find anything for the other iterator, reset to invalid values and
// return.
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
-}; // namespace DocHitInfoIterator
+};
+
+// A leaf node is a term node or a chain of section restriction node applied on
+// a term node.
+class DocHitInfoLeafIterator : public DocHitInfoIterator {
+ public:
+ bool is_leaf() override { return true; }
+
+ // Calling MapChildren on leaf node does not make sense, and will do nothing.
+ void MapChildren(const ChildrenMapper& mapper) override {}
+};
} // namespace lib
} // namespace icing
diff --git a/icing/index/iterator/section-restrict-data.cc b/icing/index/iterator/section-restrict-data.cc
new file mode 100644
index 0000000..085437d
--- /dev/null
+++ b/icing/index/iterator/section-restrict-data.cc
@@ -0,0 +1,82 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/section-restrict-data.h"
+
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+SectionIdMask SectionRestrictData::GenerateSectionMask(
+ const std::string& schema_type,
+ const std::set<std::string>& target_sections) const {
+ SectionIdMask section_mask = kSectionIdMaskNone;
+ auto section_metadata_list = schema_store_.GetSectionMetadata(schema_type);
+ if (!section_metadata_list.ok()) {
+ // The current schema doesn't have section metadata.
+ return kSectionIdMaskNone;
+ }
+ for (const SectionMetadata& section_metadata :
+ *section_metadata_list.ValueOrDie()) {
+ if (target_sections.find(section_metadata.path) != target_sections.end()) {
+ section_mask |= UINT64_C(1) << section_metadata.id;
+ }
+ }
+ return section_mask;
+}
+
+SectionIdMask SectionRestrictData::ComputeAllowedSectionsMask(
+ const std::string& schema_type) {
+ if (const auto type_property_mask_itr =
+ type_property_masks_.find(schema_type);
+ type_property_mask_itr != type_property_masks_.end()) {
+ return type_property_mask_itr->second;
+ }
+
+ // Section id mask of schema_type is never calculated before, so
+ // calculate it here and put it into type_property_masks_.
+ // - If type property filters of schema_type or wildcard (*) are
+ // specified, then create a mask according to the filters.
+ // - Otherwise, create a mask to match all properties.
+ SectionIdMask new_section_id_mask = kSectionIdMaskAll;
+ if (const auto itr = type_property_filters_.find(schema_type);
+ itr != type_property_filters_.end()) {
+ // Property filters defined for given schema type
+ new_section_id_mask = GenerateSectionMask(schema_type, itr->second);
+ } else if (const auto wildcard_itr = type_property_filters_.find(
+ std::string(SchemaStore::kSchemaTypeWildcard));
+ wildcard_itr != type_property_filters_.end()) {
+ // Property filters defined for wildcard entry
+ new_section_id_mask =
+ GenerateSectionMask(schema_type, wildcard_itr->second);
+ } else {
+ // Do not cache the section mask if no property filters apply to this schema
+ // type to avoid taking up unnecessary space.
+ return kSectionIdMaskAll;
+ }
+
+ type_property_masks_[schema_type] = new_section_id_mask;
+ return new_section_id_mask;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/section-restrict-data.h b/icing/index/iterator/section-restrict-data.h
new file mode 100644
index 0000000..26ca597
--- /dev/null
+++ b/icing/index/iterator/section-restrict-data.h
@@ -0,0 +1,98 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
+#define ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
+
+#include <cstdint>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+class SectionRestrictData {
+ public:
+ // Does not take any ownership, and all pointers must refer to valid objects
+ // that outlive the one constructed.
+ SectionRestrictData(const DocumentStore* document_store,
+ const SchemaStore* schema_store, int64_t current_time_ms,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters)
+ : document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms),
+ type_property_filters_(std::move(type_property_filters)) {}
+
+ // Calculates the section mask of allowed sections(determined by the
+ // property filters map) for the given schema type and caches the same for any
+ // future calls.
+ //
+ // Returns:
+ // - If type_property_filters_ has an entry for the given schema type or
+ // wildcard(*), return a bitwise or of section IDs in the schema type
+ // that that are also present in the relevant filter list.
+ // - Otherwise, return kSectionIdMaskAll.
+ SectionIdMask ComputeAllowedSectionsMask(const std::string& schema_type);
+
+ const DocumentStore& document_store() const { return document_store_; }
+
+ const SchemaStore& schema_store() const { return schema_store_; }
+
+ int64_t current_time_ms() const { return current_time_ms_; }
+
+ const std::unordered_map<std::string, std::set<std::string>>&
+ type_property_filters() const {
+ return type_property_filters_;
+ }
+
+ private:
+ const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
+ int64_t current_time_ms_;
+
+ // Map of property filters per schema type. Supports wildcard(*) for schema
+ // type that will apply to all schema types that are not specifically
+ // specified in the mapping otherwise.
+ std::unordered_map<std::string, std::set<std::string>> type_property_filters_;
+ // Mapping of schema type to the section mask of allowed sections for that
+ // schema type. This section mask is lazily calculated based on the
+ // specified property filters and cached for any future use.
+ std::unordered_map<std::string, SectionIdMask> type_property_masks_;
+
+ // Generates a section mask for the given schema type and the target
+ // sections.
+ //
+ // Returns:
+ // - A bitwise or of section IDs in the schema_type that that are also
+ // present in the target_sections list.
+ // - If none of the sections in the schema_type are present in the
+ // target_sections list, return kSectionIdMaskNone.
+ // This is done by doing a bitwise or of the target section ids for the
+ // given schema type.
+ SectionIdMask GenerateSectionMask(
+ const std::string& schema_type,
+ const std::set<std::string>& target_sections) const;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index acf3b33..21eecb6 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -65,12 +65,11 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
// Nothing more for the iterator to return. Set these members to invalid
// values.
doc_hit_info_ = DocHitInfo();
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
+ ++num_advance_calls_;
doc_hit_info_ = cached_hits_.at(cached_hits_idx_);
- hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
return libtextclassifier3::Status::OK;
}
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index 873ea89..7facd88 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -28,7 +28,7 @@
namespace icing {
namespace lib {
-class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
+class DocHitInfoIteratorTermLite : public DocHitInfoLeafIterator {
public:
explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
@@ -51,8 +51,14 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override { return 0; }
- int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
+ CallStats GetCallStats() const override {
+ return CallStats(
+ /*num_leaf_advance_calls_lite_index_in=*/num_advance_calls_,
+ /*num_leaf_advance_calls_main_index_in=*/0,
+ /*num_leaf_advance_calls_integer_index_in=*/0,
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/0);
+ }
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
diff --git a/icing/index/lite/lite-index-header.h b/icing/index/lite/lite-index-header.h
index 58379d6..75de8fa 100644
--- a/icing/index/lite/lite-index-header.h
+++ b/icing/index/lite/lite-index-header.h
@@ -15,6 +15,9 @@
#ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
#define ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
+#include <cstddef>
+#include <cstdint>
+
#include "icing/legacy/core/icing-string-util.h"
#include "icing/store/document-id.h"
@@ -50,7 +53,14 @@ class LiteIndex_Header {
class LiteIndex_HeaderImpl : public LiteIndex_Header {
public:
struct HeaderData {
- static const uint32_t kMagic = 0xb4fb8792;
+ static uint32_t GetCurrentMagic(
+ bool include_property_existence_metadata_hits) {
+ if (!include_property_existence_metadata_hits) {
+ return 0x01c61418;
+ } else {
+ return 0x56e07d5b;
+ }
+ }
uint32_t lite_index_crc;
uint32_t magic;
@@ -66,10 +76,15 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
uint32_t searchable_end;
};
- explicit LiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {}
+ explicit LiteIndex_HeaderImpl(HeaderData *hdr,
+ bool include_property_existence_metadata_hits)
+ : hdr_(hdr),
+ include_property_existence_metadata_hits_(
+ include_property_existence_metadata_hits) {}
bool check_magic() const override {
- return hdr_->magic == HeaderData::kMagic;
+ return hdr_->magic == HeaderData::GetCurrentMagic(
+ include_property_existence_metadata_hits_);
}
uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; }
@@ -96,7 +111,8 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
void Reset() override {
hdr_->lite_index_crc = 0;
- hdr_->magic = HeaderData::kMagic;
+ hdr_->magic =
+ HeaderData::GetCurrentMagic(include_property_existence_metadata_hits_);
hdr_->last_added_docid = kInvalidDocumentId;
hdr_->cur_size = 0;
hdr_->searchable_end = 0;
@@ -104,6 +120,7 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
private:
HeaderData *hdr_;
+ bool include_property_existence_metadata_hits_;
};
static_assert(24 == sizeof(LiteIndex_HeaderImpl::HeaderData),
"sizeof(HeaderData) != 24");
diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 8780d45..7e6c076 100644
--- a/icing/index/lite/lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -14,9 +14,13 @@
#include "icing/index/lite/lite-index-options.h"
+#include <algorithm>
+#include <cstddef>
#include <cstdint>
+#include <string>
#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
namespace icing {
namespace lib {
@@ -65,14 +69,16 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
} // namespace
-LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes,
- bool hit_buffer_sort_at_indexing,
- uint32_t hit_buffer_sort_threshold_bytes)
+LiteIndexOptions::LiteIndexOptions(
+ const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing, uint32_t hit_buffer_sort_threshold_bytes,
+ bool include_property_existence_metadata_hits)
: filename_base(filename_base),
hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
- hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
+ hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes),
+ include_property_existence_metadata_hits(
+ include_property_existence_metadata_hits) {
hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
lexicon_options = CalculateTrieOptions(hit_buffer_size);
display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h
index 9f8452c..8b03449 100644
--- a/icing/index/lite/lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -15,6 +15,9 @@
#ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
#define ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
+#include <cstdint>
+#include <string>
+
#include "icing/legacy/index/icing-dynamic-trie.h"
namespace icing {
@@ -29,7 +32,8 @@ struct LiteIndexOptions {
LiteIndexOptions(const std::string& filename_base,
uint32_t hit_buffer_want_merge_bytes,
bool hit_buffer_sort_at_indexing,
- uint32_t hit_buffer_sort_threshold_bytes);
+ uint32_t hit_buffer_sort_threshold_bytes,
+ bool include_property_existence_metadata_hits = false);
IcingDynamicTrie::Options lexicon_options;
IcingDynamicTrie::Options display_mappings_options;
@@ -39,6 +43,7 @@ struct LiteIndexOptions {
uint32_t hit_buffer_size = 0;
bool hit_buffer_sort_at_indexing = false;
uint32_t hit_buffer_sort_threshold_bytes = 0;
+ bool include_property_existence_metadata_hits = false;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index ec7141a..3f9cc93 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -168,7 +168,8 @@ libtextclassifier3::Status LiteIndex::Initialize() {
header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
- header_mmap_.address()));
+ header_mmap_.address()),
+ options_.include_property_existence_metadata_hits);
header_->Reset();
if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
@@ -183,7 +184,8 @@ libtextclassifier3::Status LiteIndex::Initialize() {
header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
- header_mmap_.address()));
+ header_mmap_.address()),
+ options_.include_property_existence_metadata_hits);
if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
sizeof(TermIdHitPair::Value), header_->cur_size(),
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 5cf6a4c..3e66858 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -100,13 +100,12 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
// Nothing more for the iterator to return. Set these members to invalid
// values.
doc_hit_info_ = DocHitInfo();
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
+ ++num_advance_calls_;
doc_hit_info_ =
cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_).doc_hit_info;
- hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
return libtextclassifier3::Status::OK;
}
@@ -139,7 +138,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
posting_list_accessor_->GetNextHitsBatch());
if (hits.empty()) {
all_pages_consumed_ = true;
+ return libtextclassifier3::Status::OK;
}
+
++num_blocks_inspected_;
cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
@@ -177,7 +178,6 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
- ++num_blocks_inspected_;
if (posting_list_accessor_ == nullptr) {
ICING_ASSIGN_OR_RETURN(MainIndex::GetPrefixAccessorResult result,
main_index_->GetAccessorForPrefixTerm(term_));
@@ -188,7 +188,10 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
posting_list_accessor_->GetNextHitsBatch());
if (hits.empty()) {
all_pages_consumed_ = true;
+ return libtextclassifier3::Status::OK;
}
+
+ ++num_blocks_inspected_;
cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index 1987e12..e32db2a 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -33,7 +33,7 @@
namespace icing {
namespace lib {
-class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
+class DocHitInfoIteratorTermMain : public DocHitInfoLeafIterator {
public:
struct DocHitInfoAndTermFrequencyArray {
DocHitInfo doc_hit_info;
@@ -70,10 +70,14 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override {
- return num_blocks_inspected_;
+ CallStats GetCallStats() const override {
+ return CallStats(
+ /*num_leaf_advance_calls_lite_index_in=*/0,
+ /*num_leaf_advance_calls_main_index_in=*/num_advance_calls_,
+ /*num_leaf_advance_calls_integer_index_in=*/0,
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/num_blocks_inspected_);
}
- int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
diff --git a/icing/index/main/posting-list-hit-serializer.cc b/icing/index/main/posting-list-hit-serializer.cc
index 00c70e9..e14a0c0 100644
--- a/icing/index/main/posting-list-hit-serializer.cc
+++ b/icing/index/main/posting-list-hit-serializer.cc
@@ -300,7 +300,8 @@ libtextclassifier3::Status PostingListHitSerializer::PrependHitToNotFull(
// Therefore, offset must be less than kSpecialHitSize + 5. Since posting
// list size must be divisible by sizeof(Hit) (5), it is guaranteed that
// offset < size_in_bytes, so it is safe to ignore the return value here.
- ConsumeTermFrequencyIfPresent(posting_list_used, &cur, &offset);
+ ICING_RETURN_IF_ERROR(
+ ConsumeTermFrequencyIfPresent(posting_list_used, &cur, &offset));
// Safe to ignore the return value of PadToEnd because offset must be less
// than posting_list_used->size_in_bytes(). Otherwise, this function
// already would have returned FAILED_PRECONDITION.
@@ -419,7 +420,7 @@ libtextclassifier3::Status PostingListHitSerializer::PopFrontHits(
// previous hits in the posting list and because there's no way that the
// posting list could run out of room because it previously stored this hit
// AND another hit.
- PrependHit(posting_list_used, out[1]);
+ ICING_RETURN_IF_ERROR(PrependHit(posting_list_used, out[1]));
} else if (num_hits > 0) {
return GetHitsInternal(posting_list_used, /*limit=*/num_hits, /*pop=*/true,
nullptr);
diff --git a/icing/index/main/posting-list-hit-serializer.h b/icing/index/main/posting-list-hit-serializer.h
index 975b05a..2986d9c 100644
--- a/icing/index/main/posting-list-hit-serializer.h
+++ b/icing/index/main/posting-list-hit-serializer.h
@@ -23,6 +23,7 @@
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/hit/hit.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -72,8 +73,9 @@ class PostingListHitSerializer : public PostingListSerializer {
// keep_prepended is true, whatever could be prepended is kept, otherwise the
// posting list is left in its original state.
template <class T, Hit (*GetHit)(const T&)>
- uint32_t PrependHitArray(PostingListUsed* posting_list_used, const T* array,
- uint32_t num_hits, bool keep_prepended) const;
+ libtextclassifier3::StatusOr<uint32_t> PrependHitArray(
+ PostingListUsed* posting_list_used, const T* array, uint32_t num_hits,
+ bool keep_prepended) const;
// Retrieves the hits stored in the posting list.
//
@@ -312,9 +314,10 @@ class PostingListHitSerializer : public PostingListSerializer {
// Inlined functions. Implementation details below. Avert eyes!
template <class T, Hit (*GetHit)(const T&)>
-uint32_t PostingListHitSerializer::PrependHitArray(
- PostingListUsed* posting_list_used, const T* array, uint32_t num_hits,
- bool keep_prepended) const {
+libtextclassifier3::StatusOr<uint32_t>
+PostingListHitSerializer::PrependHitArray(PostingListUsed* posting_list_used,
+ const T* array, uint32_t num_hits,
+ bool keep_prepended) const {
if (!IsPostingListValid(posting_list_used)) {
return 0;
}
@@ -331,7 +334,7 @@ uint32_t PostingListHitSerializer::PrependHitArray(
// before. PopFrontHits guarantees that it will remove all 'i' hits so long
// as there are at least 'i' hits in the posting list, which we know there
// are.
- PopFrontHits(posting_list_used, /*num_hits=*/i);
+ ICING_RETURN_IF_ERROR(PopFrontHits(posting_list_used, /*num_hits=*/i));
}
return i;
}
diff --git a/icing/index/main/posting-list-hit-serializer_test.cc b/icing/index/main/posting-list-hit-serializer_test.cc
index ffd8166..7f0b945 100644
--- a/icing/index/main/posting-list-hit-serializer_test.cc
+++ b/icing/index/main/posting-list-hit-serializer_test.cc
@@ -59,14 +59,14 @@ TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
// Make used.
Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
- serializer.PrependHit(&pl_used, hit0);
+ ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0));
// Size = sizeof(uncompressed hit0)
int expected_size = sizeof(Hit);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
Hit hit1(/*section_id=*/0, 1, Hit::kDefaultTermFrequency);
- serializer.PrependHit(&pl_used, hit1);
+ ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1));
// Size = sizeof(uncompressed hit1)
// + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
expected_size += 2 + sizeof(Hit::TermFrequency);
@@ -75,7 +75,7 @@ TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
IsOkAndHolds(ElementsAre(hit1, hit0)));
Hit hit2(/*section_id=*/0, 2, /*term_frequency=*/56);
- serializer.PrependHit(&pl_used, hit2);
+ ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2));
// Size = sizeof(uncompressed hit2)
// + sizeof(hit1-hit2)
// + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
@@ -85,7 +85,7 @@ TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
Hit hit3(/*section_id=*/0, 3, Hit::kDefaultTermFrequency);
- serializer.PrependHit(&pl_used, hit3);
+ ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3));
// Size = sizeof(uncompressed hit3)
// + sizeof(hit2-hit3) + sizeof(hit2::term_frequency)
// + sizeof(hit1-hit2)
@@ -232,17 +232,19 @@ TEST(PostingListHitSerializerTest,
// Add five hits. The PL is in the empty state and an empty min size PL can
// only fit two hits. So PrependHitArray should fail.
- uint32_t num_can_prepend =
- serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hits_in[0], hits_in.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t num_can_prepend,
+ (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hits_in[0], hits_in.size(), false)));
EXPECT_THAT(num_can_prepend, Eq(2));
int can_fit_hits = num_can_prepend;
// The PL has room for 2 hits. We should be able to add them without any
// problem, transitioning the PL from EMPTY -> ALMOST_FULL -> FULL
const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2);
- num_can_prepend = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, hits_in_ptr, can_fit_hits, false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ num_can_prepend, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, hits_in_ptr, can_fit_hits, false)));
EXPECT_THAT(num_can_prepend, Eq(can_fit_hits));
EXPECT_THAT(size, Eq(serializer.GetBytesUsed(&pl_used)));
std::deque<Hit> hits_pushed;
@@ -289,8 +291,10 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
// Add five hits. The PL is in the empty state and should be able to fit all
// five hits without issue, transitioning the PL from EMPTY -> NOT_FULL.
- uint32_t num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hits_in[0], hits_in.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t num_could_fit,
+ (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hits_in[0], hits_in.size(), false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
std::deque<Hit> hits_pushed;
@@ -334,8 +338,9 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
// Add these 6 hits. The PL is currently in the NOT_FULL state and should
// remain in the NOT_FULL state.
- num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hits_in[0], hits_in.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hits_in[0], hits_in.size(), false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
// All hits from hits_in were added.
@@ -368,8 +373,9 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
// Add this 1 hit. The PL is currently in the NOT_FULL state and should
// transition to the ALMOST_FULL state - even though there is still some
// unused space.
- num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hits_in[0], hits_in.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hits_in[0], hits_in.size(), false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
// All hits from hits_in were added.
@@ -408,8 +414,9 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
// second hit should tranisition to the FULL state because the delta between
// Hit #13 and Hit #14 (2 bytes) is larger than the remaining unused area
// (1 byte).
- num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hits_in[0], hits_in.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hits_in[0], hits_in.size(), false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(size, Eq(serializer.GetBytesUsed(&pl_used)));
// All hits from hits_in were added.
@@ -442,8 +449,11 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
// PrependHitArray should fail because hit_elts_in_too_many is far too large
// for the minimum size pl.
- uint32_t num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t num_could_fit,
+ (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(),
+ false)));
ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
@@ -453,8 +463,10 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// PrependHitArray should fail because hit_elts_in_too_many is one hit too
// large for this pl.
- num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
- &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+ &pl_used, &hit_elts_in_too_many[0],
+ hit_elts_in_too_many.size(), false)));
ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
@@ -476,7 +488,7 @@ TEST(PostingListHitSerializerTest,
ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue >> 2, 0)));
// Status should jump to full directly.
ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(pl_size));
- serializer.PopFrontHits(&pl, 1);
+ ICING_ASSERT_OK(serializer.PopFrontHits(&pl, 1));
// Status should return to not full as before.
ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(bytes_used));
}
diff --git a/icing/index/numeric/doc-hit-info-iterator-numeric.h b/icing/index/numeric/doc-hit-info-iterator-numeric.h
index fc66a1d..7cdb230 100644
--- a/icing/index/numeric/doc-hit-info-iterator-numeric.h
+++ b/icing/index/numeric/doc-hit-info-iterator-numeric.h
@@ -29,7 +29,7 @@ namespace icing {
namespace lib {
template <typename T>
-class DocHitInfoIteratorNumeric : public DocHitInfoIterator {
+class DocHitInfoIteratorNumeric : public DocHitInfoLeafIterator {
public:
explicit DocHitInfoIteratorNumeric(
std::unique_ptr<typename NumericIndex<T>::Iterator> numeric_index_iter)
@@ -53,9 +53,19 @@ class DocHitInfoIteratorNumeric : public DocHitInfoIterator {
"Cannot generate suggestion if the last term is numeric operator.");
}
- int32_t GetNumBlocksInspected() const override { return 0; }
+ CallStats GetCallStats() const override {
+ if (numeric_index_iter_ == nullptr) {
+ return CallStats();
+ }
- int32_t GetNumLeafAdvanceCalls() const override { return 0; }
+ return CallStats(/*num_leaf_advance_calls_lite_index_in=*/0,
+ /*num_leaf_advance_calls_main_index_in=*/0,
+ /*num_leaf_advance_calls_integer_index_in=*/
+ numeric_index_iter_->GetNumAdvanceCalls(),
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/
+ numeric_index_iter_->GetNumBlocksInspected());
+ }
std::string ToString() const override { return "test"; }
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index ce5fa45..d18f2aa 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -15,6 +15,7 @@
#ifndef ICING_INDEX_NUMERIC_DUMMY_NUMERIC_INDEX_H_
#define ICING_INDEX_NUMERIC_DUMMY_NUMERIC_INDEX_H_
+#include <cstdint>
#include <functional>
#include <map>
#include <memory>
@@ -166,7 +167,8 @@ class DummyNumericIndex : public NumericIndex<T> {
explicit Iterator(T key_lower, T key_upper,
std::vector<BucketInfo>&& bucket_info_vec)
: NumericIndex<T>::Iterator(key_lower, key_upper),
- pq_(std::less<BucketInfo>(), std::move(bucket_info_vec)) {}
+ pq_(std::less<BucketInfo>(), std::move(bucket_info_vec)),
+ num_advance_calls_(0) {}
~Iterator() override = default;
@@ -174,9 +176,15 @@ class DummyNumericIndex : public NumericIndex<T> {
DocHitInfo GetDocHitInfo() const override { return doc_hit_info_; }
+ int32_t GetNumAdvanceCalls() const override { return num_advance_calls_; }
+
+ int32_t GetNumBlocksInspected() const override { return 0; }
+
private:
std::priority_queue<BucketInfo> pq_;
DocHitInfo doc_hit_info_;
+
+ int32_t num_advance_calls_;
};
explicit DummyNumericIndex(const Filesystem& filesystem,
@@ -262,6 +270,7 @@ libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
// Merge sections with same document_id into a single DocHitInfo
while (!pq_.empty() &&
pq_.top().GetCurrentBasicHit().document_id() == document_id) {
+ ++num_advance_calls_;
doc_hit_info_.UpdateSection(pq_.top().GetCurrentBasicHit().section_id());
BucketInfo info = pq_.top();
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index f0212da..72e0266 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -152,18 +152,25 @@ class BucketPostingListIterator {
: pl_accessor_(std::move(pl_accessor)),
should_retrieve_next_batch_(true) {}
+ struct AdvanceAndFilterResult {
+ libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+ int32_t num_advance_calls = 0;
+ int32_t num_blocks_inspected = 0;
+ };
// Advances to the next relevant data. The posting list of a bucket contains
// keys within range [bucket.key_lower, bucket.key_upper], but some of them
// may be out of [query_key_lower, query_key_upper], so when advancing we have
// to filter out those non-relevant keys.
//
// Returns:
+ // AdvanceAndFilterResult. status will be:
// - OK on success
// - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
// data)
// - Any other PostingListIntegerIndexAccessor errors
- libtextclassifier3::Status AdvanceAndFilter(int64_t query_key_lower,
- int64_t query_key_upper) {
+ AdvanceAndFilterResult AdvanceAndFilter(int64_t query_key_lower,
+ int64_t query_key_upper) {
+ AdvanceAndFilterResult result;
// Move curr_ until reaching a relevant data (i.e. key in range
// [query_key_lower, query_key_upper])
do {
@@ -173,12 +180,18 @@ class BucketPostingListIterator {
curr_ >= cached_batch_integer_index_data_.cend();
}
if (should_retrieve_next_batch_) {
- ICING_RETURN_IF_ERROR(GetNextDataBatch());
+ auto status = GetNextDataBatch();
+ if (!status.ok()) {
+ result.status = std::move(status);
+ return result;
+ }
+ ++result.num_blocks_inspected;
should_retrieve_next_batch_ = false;
}
+ ++result.num_advance_calls;
} while (curr_->key() < query_key_lower || curr_->key() > query_key_upper);
- return libtextclassifier3::Status::OK;
+ return result;
}
const BasicHit& GetCurrentBasicHit() const { return curr_->basic_hit(); }
@@ -223,7 +236,9 @@ class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
explicit IntegerIndexStorageIterator(
int64_t query_key_lower, int64_t query_key_upper,
std::vector<std::unique_ptr<BucketPostingListIterator>>&& bucket_pl_iters)
- : NumericIndex<int64_t>::Iterator(query_key_lower, query_key_upper) {
+ : NumericIndex<int64_t>::Iterator(query_key_lower, query_key_upper),
+ num_advance_calls_(0),
+ num_blocks_inspected_(0) {
std::vector<BucketPostingListIterator*> bucket_pl_iters_raw_ptrs;
for (std::unique_ptr<BucketPostingListIterator>& bucket_pl_itr :
bucket_pl_iters) {
@@ -233,11 +248,15 @@ class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
// Note: it is possible that the bucket iterator fails to advance for the
// first round, because data could be filtered out by [query_key_lower,
// query_key_upper]. In this case, just discard the iterator.
- if (bucket_pl_itr->AdvanceAndFilter(query_key_lower, query_key_upper)
- .ok()) {
+ BucketPostingListIterator::AdvanceAndFilterResult
+ advance_and_filter_result =
+ bucket_pl_itr->AdvanceAndFilter(query_key_lower, query_key_upper);
+ if (advance_and_filter_result.status.ok()) {
bucket_pl_iters_raw_ptrs.push_back(bucket_pl_itr.get());
bucket_pl_iters_.push_back(std::move(bucket_pl_itr));
}
+ num_advance_calls_ += advance_and_filter_result.num_advance_calls;
+ num_blocks_inspected_ += advance_and_filter_result.num_blocks_inspected;
}
pq_ = std::priority_queue<BucketPostingListIterator*,
@@ -260,6 +279,12 @@ class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
DocHitInfo GetDocHitInfo() const override { return doc_hit_info_; }
+ int32_t GetNumAdvanceCalls() const override { return num_advance_calls_; }
+
+ int32_t GetNumBlocksInspected() const override {
+ return num_blocks_inspected_;
+ }
+
private:
BucketPostingListIterator::Comparator comparator_;
@@ -281,6 +306,9 @@ class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
pq_;
DocHitInfo doc_hit_info_;
+
+ int32_t num_advance_calls_;
+ int32_t num_blocks_inspected_;
};
libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
@@ -300,7 +328,12 @@ libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
do {
doc_hit_info_.UpdateSection(
bucket_itr->GetCurrentBasicHit().section_id());
- advance_status = bucket_itr->AdvanceAndFilter(key_lower_, key_upper_);
+ BucketPostingListIterator::AdvanceAndFilterResult
+ advance_and_filter_result =
+ bucket_itr->AdvanceAndFilter(key_lower_, key_upper_);
+ advance_status = std::move(advance_and_filter_result.status);
+ num_advance_calls_ += advance_and_filter_result.num_advance_calls;
+ num_blocks_inspected_ += advance_and_filter_result.num_blocks_inspected;
} while (advance_status.ok() &&
bucket_itr->GetCurrentBasicHit().document_id() == document_id);
if (advance_status.ok()) {
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
index 8675172..a632bc8 100644
--- a/icing/index/numeric/integer-index-storage_test.cc
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -57,6 +57,7 @@ using ::testing::IsFalse;
using ::testing::IsTrue;
using ::testing::Key;
using ::testing::Le;
+using ::testing::Lt;
using ::testing::Ne;
using ::testing::Not;
@@ -1428,6 +1429,130 @@ TEST_P(IntegerIndexStorageTest,
EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
}
+TEST_P(IntegerIndexStorageTest, IteratorCallStatsMultipleBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{208}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1000}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{300}));
+ ASSERT_THAT(storage->num_data(), Eq(5));
+
+ // GetIterator for range [INT_MIN, INT_MAX] and Advance all. Those 5 keys are
+ // in 2 buckets, so we will be inspecting 2 posting lists in 2 blocks.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iter1,
+ storage->GetIterator(/*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+ while (iter1->Advance().ok()) {
+ // Advance all hits.
+ }
+ EXPECT_THAT(
+ iter1->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/5,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/2));
+
+ // GetIterator for range [-1000, -100] and Advance all. Since we only have to
+ // read bucket (-1000,-100), there will be 3 advance calls and 1 block
+ // inspected.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iter2,
+ storage->GetIterator(/*key_lower=*/-1000, /*key_upper=*/-100));
+ while (iter2->Advance().ok()) {
+ // Advance all hits.
+ }
+ EXPECT_THAT(
+ iter2->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/3,
+ /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+}
+
+TEST_P(IntegerIndexStorageTest, IteratorCallStatsSingleBucketChainedBlocks) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ int32_t num_keys_to_add = 800;
+ ASSERT_THAT(num_keys_to_add,
+ Lt(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit));
+ for (int i = 0; i < num_keys_to_add; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/i, kDefaultSectionId,
+ /*new_keys=*/{i}));
+ }
+
+ // Those 800 keys are in 1 single bucket with 3 chained posting lists, so we
+ // will be inspecting 3 blocks.
+ int32_t expected_num_blocks_inspected = 3;
+
+ // GetIterator for range [INT_MIN, INT_MAX] and Advance all.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iter1,
+ storage->GetIterator(/*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+ while (iter1->Advance().ok()) {
+ // Advance all hits.
+ }
+ EXPECT_THAT(iter1->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/num_keys_to_add,
+ /*num_leaf_advance_calls_no_index=*/0,
+ expected_num_blocks_inspected));
+
+ // GetIterator for range [1, 1] and Advance all. Although there is only 1
+ // relevant data, we still have to inspect the entire bucket and its posting
+ // lists chain (which contain 3 blocks and 800 data).
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iter2,
+ storage->GetIterator(/*key_lower=*/1, /*key_upper=*/1));
+ while (iter2->Advance().ok()) {
+ // Advance all hits.
+ }
+ EXPECT_THAT(iter2->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/num_keys_to_add,
+ /*num_leaf_advance_calls_no_index=*/0,
+ expected_num_blocks_inspected));
+}
+
TEST_P(IntegerIndexStorageTest, SplitBuckets) {
int32_t custom_num_data_threshold_for_bucket_split = 300;
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
index b2fe159..8c80698 100644
--- a/icing/index/numeric/integer-index.cc
+++ b/icing/index/numeric/integer-index.cc
@@ -236,7 +236,7 @@ IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
std::unique_ptr<DocHitInfoIterator> delegate,
wildcard_index_storage_->GetIterator(key_lower, key_upper));
std::set<std::string> property_paths = {std::move(property_path_str)};
- return std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ return DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::move(delegate), &document_store, &schema_store,
std::move(property_paths), current_time_ms);
}
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
index b2e3fbe..3b60001 100644
--- a/icing/index/numeric/integer-index_test.cc
+++ b/icing/index/numeric/integer-index_test.cc
@@ -14,6 +14,7 @@
#include "icing/index/numeric/integer-index.h"
+#include <cstdint>
#include <limits>
#include <memory>
#include <string>
@@ -140,7 +141,7 @@ class NumericIndexIntegerTest : public ::testing::Test {
return absl_ports::InternalError("Unable to create compact directory");
}
ICING_ASSIGN_OR_RETURN(
- std::vector<DocumentId> docid_map,
+ DocumentStore::OptimizeResult doc_store_optimize_result,
doc_store_->OptimizeInto(document_store_compact_dir, nullptr));
doc_store_.reset();
@@ -164,7 +165,7 @@ class NumericIndexIntegerTest : public ::testing::Test {
DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr));
doc_store_ = std::move(doc_store_create_result.document_store);
- return docid_map;
+ return std::move(doc_store_optimize_result.document_id_old_to_new);
}
libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
@@ -2444,6 +2445,138 @@ TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
/*document_id=*/7, expected_sections_typea))));
}
+TEST_P(IntegerIndexTest, IteratorCallStats) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+
+ // GetIterator for range [INT_MIN, INT_MAX] and Advance all. Those 4 keys are
+ // in 1 single bucket, so there will be only 1 posting list (and 1 block).
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iter,
+ integer_index->GetIterator(
+ kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *doc_store_,
+ *schema_store_, clock_.GetSystemTimeMilliseconds()));
+
+ // 1 block should be read even without calling Advance(), since we read the
+ // posting list and put bucket into the priority queue in ctor.
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/1,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/1));
+
+ // 1st Advance().
+ ICING_ASSERT_OK(iter->Advance());
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/2,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/1));
+
+ // 2nd Advance().
+ ICING_ASSERT_OK(iter->Advance());
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/3,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/1));
+
+ // 3rd Advance().
+ ICING_ASSERT_OK(iter->Advance());
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/4,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/1));
+
+ // 4th Advance().
+ ICING_ASSERT_OK(iter->Advance());
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/4,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/1));
+
+ // 5th Advance().
+ ASSERT_THAT(iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/4,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/1));
+}
+
+TEST_P(IntegerIndexTest, IteratorCallStatsNonExistingProperty) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+
+ // GetIterator for property "otherProperty1".
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iter,
+ integer_index->GetIterator(
+ "otherProperty1", /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max(), *doc_store_,
+ *schema_store_, clock_.GetSystemTimeMilliseconds()));
+
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/0));
+
+ // 1st Advance().
+ ASSERT_THAT(iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(iter->GetCallStats(),
+ EqualsDocHitInfoIteratorCallStats(
+ /*num_leaf_advance_calls_lite_index=*/0,
+ /*num_leaf_advance_calls_main_index=*/0,
+ /*num_leaf_advance_calls_integer_index=*/0,
+ /*num_leaf_advance_calls_no_index=*/0,
+ /*num_blocks_inspected=*/0));
+}
+
INSTANTIATE_TEST_SUITE_P(
IntegerIndexTest, IntegerIndexTest,
testing::Values(
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
index 57911de..d094d3d 100644
--- a/icing/index/numeric/numeric-index.h
+++ b/icing/index/numeric/numeric-index.h
@@ -15,6 +15,7 @@
#ifndef ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
#define ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
+#include <cstdint>
#include <memory>
#include <string>
#include <string_view>
@@ -100,6 +101,10 @@ class NumericIndex : public PersistentStorage {
virtual DocHitInfo GetDocHitInfo() const = 0;
+ virtual int32_t GetNumAdvanceCalls() const = 0;
+
+ virtual int32_t GetNumBlocksInspected() const = 0;
+
protected:
T key_lower_;
T key_upper_;
diff --git a/icing/index/numeric/posting-list-integer-index-serializer.cc b/icing/index/numeric/posting-list-integer-index-serializer.cc
index 6556451..99f14f9 100644
--- a/icing/index/numeric/posting-list-integer-index-serializer.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer.cc
@@ -222,7 +222,8 @@ libtextclassifier3::Status PostingListIntegerIndexSerializer::PrependData(
}
}
-uint32_t PostingListIntegerIndexSerializer::PrependDataArray(
+libtextclassifier3::StatusOr<uint32_t>
+PostingListIntegerIndexSerializer::PrependDataArray(
PostingListUsed* posting_list_used, const IntegerIndexData* array,
uint32_t num_data, bool keep_prepended) const {
if (!IsPostingListValid(posting_list_used)) {
@@ -240,7 +241,7 @@ uint32_t PostingListIntegerIndexSerializer::PrependDataArray(
// before. PopFrontData guarantees that it will remove all 'i' data so long
// as there are at least 'i' data in the posting list, which we know there
// are.
- PopFrontData(posting_list_used, /*num_data=*/i);
+ ICING_RETURN_IF_ERROR(PopFrontData(posting_list_used, /*num_data=*/i));
return 0;
}
return i;
@@ -335,7 +336,7 @@ libtextclassifier3::Status PostingListIntegerIndexSerializer::PopFrontData(
// - out[1] is a valid data less than all previous data in the posting list.
// - There's no way that the posting list could run out of room because it
// previously stored these 2 data.
- PrependData(posting_list_used, out[1]);
+ ICING_RETURN_IF_ERROR(PrependData(posting_list_used, out[1]));
} else if (num_data > 0) {
return GetDataInternal(posting_list_used, /*limit=*/num_data, /*pop=*/true,
/*out=*/nullptr);
diff --git a/icing/index/numeric/posting-list-integer-index-serializer.h b/icing/index/numeric/posting-list-integer-index-serializer.h
index ea2f2da..cbaed33 100644
--- a/icing/index/numeric/posting-list-integer-index-serializer.h
+++ b/icing/index/numeric/posting-list-integer-index-serializer.h
@@ -79,9 +79,9 @@ class PostingListIntegerIndexSerializer : public PostingListSerializer {
// RETURNS:
// The number of data that have been prepended to the posting list. If
// keep_prepended is false and reverted, then it returns 0.
- uint32_t PrependDataArray(PostingListUsed* posting_list_used,
- const IntegerIndexData* array, uint32_t num_data,
- bool keep_prepended) const;
+ libtextclassifier3::StatusOr<uint32_t> PrependDataArray(
+ PostingListUsed* posting_list_used, const IntegerIndexData* array,
+ uint32_t num_data, bool keep_prepended) const;
// Retrieves all data stored in the posting list.
//
diff --git a/icing/index/numeric/posting-list-integer-index-serializer_test.cc b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
index bfb4e71..716d1aa 100644
--- a/icing/index/numeric/posting-list-integer-index-serializer_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
@@ -241,7 +241,7 @@ TEST(PostingListIntegerIndexSerializerTest,
EXPECT_THAT(
serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
/*keep_prepended=*/false),
- Eq(data_in.size()));
+ IsOkAndHolds(data_in.size()));
std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
Eq(data_pushed.size() * sizeof(IntegerIndexData)));
@@ -258,7 +258,7 @@ TEST(PostingListIntegerIndexSerializerTest,
EXPECT_THAT(
serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
/*keep_prepended=*/false),
- Eq(data_in.size()));
+ IsOkAndHolds(data_in.size()));
std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
Eq(data_pushed.size() * sizeof(IntegerIndexData)));
@@ -276,7 +276,7 @@ TEST(PostingListIntegerIndexSerializerTest,
EXPECT_THAT(
serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
/*keep_prepended=*/false),
- Eq(0));
+ IsOkAndHolds(0));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
Eq(data_pushed.size() * sizeof(IntegerIndexData)));
EXPECT_THAT(
@@ -288,7 +288,7 @@ TEST(PostingListIntegerIndexSerializerTest,
EXPECT_THAT(
serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
/*keep_prepended=*/false),
- Eq(data_in.size()));
+ IsOkAndHolds(data_in.size()));
std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
Eq(data_pushed.size() * sizeof(IntegerIndexData)));
@@ -319,7 +319,7 @@ TEST(PostingListIntegerIndexSerializerTest, PrependDataArrayKeepPrepended) {
EXPECT_THAT(
serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
/*keep_prepended=*/true),
- Eq(data_in.size()));
+ IsOkAndHolds(data_in.size()));
std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
Eq(data_pushed.size() * sizeof(IntegerIndexData)));
@@ -341,7 +341,7 @@ TEST(PostingListIntegerIndexSerializerTest, PrependDataArrayKeepPrepended) {
EXPECT_THAT(
serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
/*keep_prepended=*/true),
- Eq(3));
+ IsOkAndHolds(3));
data_in.resize(3);
std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
@@ -365,7 +365,7 @@ TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
ASSERT_THAT(
serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
/*keep_prepended=*/false),
- Eq(data_arr1.size()));
+ IsOkAndHolds(data_arr1.size()));
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
@@ -378,7 +378,7 @@ TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
ASSERT_THAT(
serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
/*keep_prepended=*/false),
- Eq(data_arr2.size()));
+ IsOkAndHolds(data_arr2.size()));
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
IsOk());
@@ -402,7 +402,7 @@ TEST(PostingListIntegerIndexSerializerTest,
ASSERT_THAT(
serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
/*keep_prepended=*/false),
- Eq(data_arr.size()));
+ IsOkAndHolds(data_arr.size()));
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used, /*src=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
@@ -433,7 +433,7 @@ TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
ASSERT_THAT(
serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
/*keep_prepended=*/false),
- Eq(data_arr1.size()));
+ IsOkAndHolds(data_arr1.size()));
int size2 = serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
@@ -444,7 +444,7 @@ TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
ASSERT_THAT(
serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
/*keep_prepended=*/false),
- Eq(data_arr2.size()));
+ IsOkAndHolds(data_arr2.size()));
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
@@ -471,7 +471,7 @@ TEST(PostingListIntegerIndexSerializerTest, PopFrontData) {
ASSERT_THAT(
serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
/*keep_prepended=*/false),
- Eq(data_arr.size()));
+ IsOkAndHolds(data_arr.size()));
ASSERT_THAT(
serializer.GetData(&pl_used),
IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
diff --git a/icing/index/property-existence-indexing-handler.cc b/icing/index/property-existence-indexing-handler.cc
new file mode 100644
index 0000000..504f380
--- /dev/null
+++ b/icing/index/property-existence-indexing-handler.cc
@@ -0,0 +1,127 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/property-existence-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+void ConstructPropertyExistenceMetaToken(
+ const std::string& current_path, const DocumentProto& document,
+ std::unordered_set<std::string>& meta_tokens) {
+ for (const PropertyProto& property : document.properties()) {
+ std::string new_path = current_path;
+ if (!new_path.empty()) {
+ new_path.append(".");
+ }
+ new_path.append(property.name());
+ for (const DocumentProto& nested_document : property.document_values()) {
+ ConstructPropertyExistenceMetaToken(new_path, nested_document,
+ meta_tokens);
+ }
+ // A string property exists if and only if there is at least one non-empty
+ // string in the property.
+ bool has_string_value = false;
+ for (const std::string& string_value : property.string_values()) {
+ if (!string_value.empty()) {
+ has_string_value = true;
+ break;
+ }
+ }
+ if (has_string_value || property.int64_values_size() > 0 ||
+ property.double_values_size() > 0 ||
+ property.boolean_values_size() > 0 ||
+ property.bytes_values_size() > 0 ||
+ property.document_values_size() > 0) {
+ meta_tokens.insert(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, new_path));
+ }
+ }
+}
+
+} // namespace
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<PropertyExistenceIndexingHandler>>
+PropertyExistenceIndexingHandler::Create(const Clock* clock, Index* index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(index);
+
+ return std::unique_ptr<PropertyExistenceIndexingHandler>(
+ new PropertyExistenceIndexingHandler(*clock, index));
+}
+
+libtextclassifier3::Status PropertyExistenceIndexingHandler::Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ PutDocumentStatsProto* put_document_stats) {
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+ libtextclassifier3::Status status;
+ // Section id is irrelevant to metadata tokens that is used to support
+ // property existence check.
+ Index::Editor editor =
+ index_.Edit(document_id, /*section_id=*/0, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ std::unordered_set<std::string> meta_tokens;
+ ConstructPropertyExistenceMetaToken(
+ /*current_path=*/"", tokenized_document.document(), meta_tokens);
+ for (const std::string& meta_token : meta_tokens) {
+ status = editor.BufferTerm(meta_token.c_str());
+ if (!status.ok()) {
+ // We've encountered a failure. Bail out. We'll mark this doc as deleted
+ // and signal a failure to the client.
+ ICING_LOG(WARNING) << "Failed to buffer term in lite lexicon due to: "
+ << status.error_message();
+ break;
+ }
+ }
+
+ if (status.ok()) {
+ // Add all the metadata tokens to support property existence check.
+ status = editor.IndexAllBufferedTerms();
+ if (!status.ok()) {
+ ICING_LOG(WARNING) << "Failed to add hits in lite index due to: "
+ << status.error_message();
+ }
+ }
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_metadata_term_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ put_document_stats->mutable_tokenization_stats()
+ ->set_num_metadata_tokens_indexed(meta_tokens.size());
+ }
+
+ return status;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/property-existence-indexing-handler.h b/icing/index/property-existence-indexing-handler.h
new file mode 100644
index 0000000..55c0bb4
--- /dev/null
+++ b/icing/index/property-existence-indexing-handler.h
@@ -0,0 +1,86 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_PROPERTY_EXISTENCE_INDEXING_HANDLER_H_
+#define ICING_INDEX_PROPERTY_EXISTENCE_INDEXING_HANDLER_H_
+
+#include <memory>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+inline constexpr std::string_view kPropertyExistenceTokenPrefix =
+ "\xFF_HAS_\xFF";
+
+// This class is meant to be owned by TermIndexingHandler. Instead of using this
+// handler directly, callers should use TermIndexingHandler to index documents.
+//
+// This handler will not check or set last_added_document_id of the index, and
+// it will not merge or sort the lite index either.
+class PropertyExistenceIndexingHandler {
+ public:
+ // Creates a PropertyExistenceIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created PropertyExistenceIndexingHandler instance.
+ //
+ // Returns:
+ // - A PropertyExistenceIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PropertyExistenceIndexingHandler>>
+ Create(const Clock* clock, Index* index);
+
+ ~PropertyExistenceIndexingHandler() = default;
+
+ // Handles the property existence indexing process: add hits for metadata
+ // tokens used to index property existence.
+ //
+ // For example, if the passed in document has string properties "propA",
+ // "propB" and "propC.propD", and document property "propC", this handler will
+ // add the following metadata token to the index.
+ // - kPropertyExistenceTokenPrefix + "propA"
+ // - kPropertyExistenceTokenPrefix + "propB"
+ // - kPropertyExistenceTokenPrefix + "propC"
+ // - kPropertyExistenceTokenPrefix + "propC.propD"
+ //
+ /// Returns:
+ // - OK on success
+ // - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
+ // content.
+ // - INTERNAL_ERROR if any other errors occur.
+ libtextclassifier3::Status Handle(const TokenizedDocument& tokenized_document,
+ DocumentId document_id,
+ PutDocumentStatsProto* put_document_stats);
+
+ private:
+ explicit PropertyExistenceIndexingHandler(const Clock& clock, Index* index)
+ : clock_(clock), index_(*index) {}
+
+ const Clock& clock_; // Does not own.
+ Index& index_; // Does not own.
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_PROPERTY_EXISTENCE_INDEXING_HANDLER_H_
diff --git a/icing/index/property-existence-indexing-handler_test.cc b/icing/index/property-existence-indexing-handler_test.cc
new file mode 100644
index 0000000..e42fbc3
--- /dev/null
+++ b/icing/index/property-existence-indexing-handler_test.cc
@@ -0,0 +1,524 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/property-existence-indexing-handler.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsTrue;
+using ::testing::Test;
+
+static constexpr std::string_view kTreeType = "TreeNode";
+static constexpr std::string_view kPropertyName = "name";
+static constexpr std::string_view kPropertyValue = "value";
+static constexpr std::string_view kPropertySubtrees = "subtrees";
+
+static constexpr std::string_view kValueType = "Value";
+static constexpr std::string_view kPropertyBody = "body";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+static constexpr std::string_view kPropertyScore = "score";
+
+class PropertyExistenceIndexingHandlerTest : public Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ index_dir_ = base_dir_ + "/index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+ document_store_dir_ = base_dir_ + "/document_store";
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ normalizer_,
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kTreeType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyValue)
+ .SetDataTypeDocument(
+ kValueType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertySubtrees)
+ .SetDataTypeDocument(
+ kTreeType, /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kValueType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyBody)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyScore)
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+
+ ASSERT_TRUE(
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(doc_store_create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ normalizer_.reset();
+ lang_segmenter_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string index_dir_;
+ std::string schema_store_dir_;
+ std::string document_store_dir_;
+
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryExistence(Index* index, std::string_view property_path) {
+ return index->GetIterator(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, property_path),
+ /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY,
+ /*need_hit_term_frequency=*/false);
+}
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+ std::vector<DocHitInfo> infos;
+ while (iterator->Advance().ok()) {
+ infos.push_back(iterator->doc_hit_info());
+ }
+ return infos;
+}
+
+TEST_F(PropertyExistenceIndexingHandlerTest, HandlePropertyExistence) {
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Create a document with every property.
+ DocumentProto document0 =
+ DocumentBuilder()
+ .SetKey("icing", "uri0")
+ .SetSchema(std::string(kValueType))
+ .AddStringProperty(std::string(kPropertyBody), "foo")
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+ .Build();
+ // Create a document with missing body.
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema(std::string(kValueType))
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+ .Build();
+ // Create a document with missing timestamp.
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "uri2")
+ .SetSchema(std::string(kValueType))
+ .AddStringProperty(std::string(kPropertyBody), "foo")
+ .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document0,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document0)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id0,
+ document_store_->Put(tokenized_document0.document()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PropertyExistenceIndexingHandler> handler,
+ PropertyExistenceIndexingHandler::Create(&fake_clock_, index.get()));
+
+ // Handle all docs
+ EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ // Get all documents that have "body".
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+ QueryExistence(index.get(), kPropertyBody));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{0}),
+ EqualsDocHitInfo(document_id0, std::vector<SectionId>{0})));
+
+ // Get all documents that have "timestamp".
+ ICING_ASSERT_OK_AND_ASSIGN(itr,
+ QueryExistence(index.get(), kPropertyTimestamp));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id1, std::vector<SectionId>{0}),
+ EqualsDocHitInfo(document_id0, std::vector<SectionId>{0})));
+
+ // Get all documents that have "score".
+ ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), kPropertyScore));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{0}),
+ EqualsDocHitInfo(document_id1, std::vector<SectionId>{0}),
+ EqualsDocHitInfo(document_id0, std::vector<SectionId>{0})));
+}
+
+TEST_F(PropertyExistenceIndexingHandlerTest, HandleNestedPropertyExistence) {
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Create a complex nested root_document with the following property paths.
+ // - name
+ // - subtrees
+ // - subtrees.name
+ // - subtrees.value
+ // - subtrees.value.timestamp
+ // - subtrees.subtrees
+ // - subtrees.subtrees.name
+ // - subtrees.subtrees.value
+ // - subtrees.subtrees.value.body
+ // - subtrees.subtrees.value.score
+ DocumentProto leaf_document =
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema(std::string(kTreeType))
+ .AddStringProperty(std::string(kPropertyName), "leaf")
+ .AddDocumentProperty(
+ std::string(kPropertyValue),
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema(std::string(kValueType))
+ .AddStringProperty(std::string(kPropertyBody), "foo")
+ .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+ .Build())
+ .Build();
+ DocumentProto intermediate_document1 =
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema(std::string(kTreeType))
+ .AddStringProperty(std::string(kPropertyName), "intermediate1")
+ .AddDocumentProperty(
+ std::string(kPropertyValue),
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema(std::string(kValueType))
+ .AddInt64Property(std::string(kPropertyTimestamp), 123)
+ .Build())
+ .AddDocumentProperty(std::string(kPropertySubtrees), leaf_document)
+ .Build();
+ DocumentProto intermediate_document2 =
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema(std::string(kTreeType))
+ .AddStringProperty(std::string(kPropertyName), "intermediate2")
+ .Build();
+ DocumentProto root_document =
+ DocumentBuilder()
+ .SetKey("icing", "uri")
+ .SetSchema(std::string(kTreeType))
+ .AddStringProperty(std::string(kPropertyName), "root")
+ .AddDocumentProperty(std::string(kPropertySubtrees),
+ intermediate_document1, intermediate_document2)
+ .Build();
+
+ // Handle root_document
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_root_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(root_document)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_root_document.document()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PropertyExistenceIndexingHandler> handler,
+ PropertyExistenceIndexingHandler::Create(&fake_clock_, index.get()));
+ EXPECT_THAT(handler->Handle(tokenized_root_document, document_id,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ // Check that the above property paths can be found by query.
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+ QueryExistence(index.get(), "name"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), "subtrees"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), "subtrees.name"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr,
+ QueryExistence(index.get(), "subtrees.value"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, QueryExistence(index.get(), "subtrees.value.timestamp"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr,
+ QueryExistence(index.get(), "subtrees.subtrees"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, QueryExistence(index.get(), "subtrees.subtrees.name"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, QueryExistence(index.get(), "subtrees.subtrees.value"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, QueryExistence(index.get(), "subtrees.subtrees.value.body"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, QueryExistence(index.get(), "subtrees.subtrees.value.score"));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+}
+
+TEST_F(PropertyExistenceIndexingHandlerTest, SingleEmptyStringIsNonExisting) {
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Create a document with one empty body.
+ DocumentProto document0 =
+ DocumentBuilder()
+ .SetKey("icing", "uri0")
+ .SetSchema(std::string(kValueType))
+ .AddStringProperty(std::string(kPropertyBody), "")
+ .Build();
+ // Create a document with two empty body.
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema(std::string(kValueType))
+ .AddStringProperty(std::string(kPropertyBody), "", "")
+ .Build();
+ // Create a document with one non-empty body.
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "uri2")
+ .SetSchema(std::string(kValueType))
+ .AddStringProperty(std::string(kPropertyBody), "foo")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document0,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document0)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id0,
+ document_store_->Put(tokenized_document0.document()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PropertyExistenceIndexingHandler> handler,
+ PropertyExistenceIndexingHandler::Create(&fake_clock_, index.get()));
+
+ // Handle all docs
+ EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ // Check that the documents that have one or two empty bodies will not be
+ // considered as having a body property.
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+ QueryExistence(index.get(), kPropertyBody));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{0})));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index f5e06ad..8b20d04 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -21,15 +21,12 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/index.h"
-#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
@@ -39,35 +36,18 @@ namespace lib {
/* static */ libtextclassifier3::StatusOr<
std::unique_ptr<StringSectionIndexingHandler>>
-StringSectionIndexingHandler::Create(const Clock* clock,
- const Normalizer* normalizer,
+StringSectionIndexingHandler::Create(const Normalizer* normalizer,
Index* index) {
- ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(normalizer);
ICING_RETURN_ERROR_IF_NULL(index);
return std::unique_ptr<StringSectionIndexingHandler>(
- new StringSectionIndexingHandler(clock, normalizer, index));
+ new StringSectionIndexingHandler(normalizer, index));
}
libtextclassifier3::Status StringSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
- std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
-
- if (index_.last_added_document_id() != kInvalidDocumentId &&
- document_id <= index_.last_added_document_id()) {
- if (recovery_mode) {
- // Skip the document if document_id <= last_added_document_id in recovery
- // mode without returning an error.
- return libtextclassifier3::Status::OK;
- }
- return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
- "DocumentId %d must be greater than last added document_id %d",
- document_id, index_.last_added_document_id()));
- }
- index_.set_last_added_document_id(document_id);
-
+ PutDocumentStatsProto* put_document_stats) {
uint32_t num_tokens = 0;
libtextclassifier3::Status status;
for (const TokenizedSection& section :
@@ -122,53 +102,11 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
}
}
- // Check and sort the LiteIndex HitBuffer if we're successful.
- if (status.ok() && index_.LiteIndexNeedSort()) {
- std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
- index_.SortLiteIndex();
-
- if (put_document_stats != nullptr) {
- put_document_stats->set_lite_index_sort_latency_ms(
- sort_timer->GetElapsedMilliseconds());
- }
- }
-
if (put_document_stats != nullptr) {
- put_document_stats->set_term_index_latency_ms(
- index_timer->GetElapsedMilliseconds());
put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
num_tokens);
}
- // If we're either successful or we've hit resource exhausted, then attempt a
- // merge.
- if ((status.ok() || absl_ports::IsResourceExhausted(status)) &&
- index_.WantsMerge()) {
- ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
-
- std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
- libtextclassifier3::Status merge_status = index_.Merge();
-
- if (!merge_status.ok()) {
- ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
- if (!index_.Reset().ok()) {
- return absl_ports::InternalError(IcingStringUtil::StringPrintf(
- "Unable to reset to clear index after merge failure. Merge "
- "failure=%d:%s",
- merge_status.error_code(), merge_status.error_message().c_str()));
- } else {
- return absl_ports::DataLossError(IcingStringUtil::StringPrintf(
- "Forced to reset index after merge failure. Merge failure=%d:%s",
- merge_status.error_code(), merge_status.error_message().c_str()));
- }
- }
-
- if (put_document_stats != nullptr) {
- put_document_stats->set_index_merge_latency_ms(
- merge_timer->GetElapsedMilliseconds());
- }
- }
-
return status;
}
diff --git a/icing/index/string-section-indexing-handler.h b/icing/index/string-section-indexing-handler.h
index 6abfba5..8452e9f 100644
--- a/icing/index/string-section-indexing-handler.h
+++ b/icing/index/string-section-indexing-handler.h
@@ -19,18 +19,21 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/index/data-indexing-handler.h"
#include "icing/index/index.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
#include "icing/util/tokenized-document.h"
namespace icing {
namespace lib {
-class StringSectionIndexingHandler : public DataIndexingHandler {
+// This class is meant to be owned by TermIndexingHandler. Instead of using this
+// handler directly, callers should use TermIndexingHandler to index documents.
+//
+// This handler will not check or set last_added_document_id of the index, and
+// it will not merge or sort the lite index either.
+class StringSectionIndexingHandler {
public:
// Creates a StringSectionIndexingHandler instance which does not take
// ownership of any input components. All pointers must refer to valid objects
@@ -41,9 +44,9 @@ class StringSectionIndexingHandler : public DataIndexingHandler {
// - FAILED_PRECONDITION_ERROR if any of the input pointer is null
static libtextclassifier3::StatusOr<
std::unique_ptr<StringSectionIndexingHandler>>
- Create(const Clock* clock, const Normalizer* normalizer, Index* index);
+ Create(const Normalizer* normalizer, Index* index);
- ~StringSectionIndexingHandler() override = default;
+ ~StringSectionIndexingHandler() = default;
// Handles the string term indexing process: add hits into the lite index for
// all contents in tokenized_document.tokenized_string_sections and merge lite
@@ -51,23 +54,18 @@ class StringSectionIndexingHandler : public DataIndexingHandler {
//
/// Returns:
// - OK on success
- // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
- // document_id of a previously indexed document in non recovery mode.
// - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
// content.
- // - DATA_LOSS_ERROR if an attempt to merge the index fails and both indices
- // are cleared as a result.
// - INTERNAL_ERROR if any other errors occur.
// - Any main/lite index errors.
- libtextclassifier3::Status Handle(
- const TokenizedDocument& tokenized_document, DocumentId document_id,
- bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+ libtextclassifier3::Status Handle(const TokenizedDocument& tokenized_document,
+ DocumentId document_id,
+ PutDocumentStatsProto* put_document_stats);
private:
- explicit StringSectionIndexingHandler(const Clock* clock,
- const Normalizer* normalizer,
+ explicit StringSectionIndexingHandler(const Normalizer* normalizer,
Index* index)
- : DataIndexingHandler(clock), normalizer_(*normalizer), index_(*index) {}
+ : normalizer_(*normalizer), index_(*index) {}
const Normalizer& normalizer_; // Does not own.
Index& index_; // Does not own.
diff --git a/icing/index/term-indexing-handler.cc b/icing/index/term-indexing-handler.cc
new file mode 100644
index 0000000..7eb9dda
--- /dev/null
+++ b/icing/index/term-indexing-handler.cc
@@ -0,0 +1,146 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/term-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/index.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/index/string-section-indexing-handler.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<TermIndexingHandler>>
+TermIndexingHandler::Create(const Clock* clock, const Normalizer* normalizer,
+ Index* index,
+ bool build_property_existence_metadata_hits) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(normalizer);
+ ICING_RETURN_ERROR_IF_NULL(index);
+
+ // Property existence index handler
+ std::unique_ptr<PropertyExistenceIndexingHandler>
+ property_existence_indexing_handler = nullptr;
+ if (build_property_existence_metadata_hits) {
+ ICING_ASSIGN_OR_RETURN(
+ property_existence_indexing_handler,
+ PropertyExistenceIndexingHandler::Create(clock, index));
+ }
+ // String section index handler
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(normalizer, index));
+
+ return std::unique_ptr<TermIndexingHandler>(new TermIndexingHandler(
+ clock, index, std::move(property_existence_indexing_handler),
+ std::move(string_section_indexing_handler)));
+}
+
+libtextclassifier3::Status TermIndexingHandler::Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+ if (index_.last_added_document_id() != kInvalidDocumentId &&
+ document_id <= index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "DocumentId %d must be greater than last added document_id %d",
+ document_id, index_.last_added_document_id()));
+ }
+ index_.set_last_added_document_id(document_id);
+
+ libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+ if (property_existence_indexing_handler_ != nullptr) {
+ status = property_existence_indexing_handler_->Handle(
+ tokenized_document, document_id, put_document_stats);
+ }
+ if (status.ok()) {
+ status = string_section_indexing_handler_->Handle(
+ tokenized_document, document_id, put_document_stats);
+ }
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_term_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ }
+
+ // Check if we should merge when we're either successful or we've hit resource
+ // exhausted.
+ bool should_merge =
+ (status.ok() || absl_ports::IsResourceExhausted(status)) &&
+ index_.WantsMerge();
+
+ // Check and sort the LiteIndex HitBuffer if we don't need to merge.
+ if (!should_merge && index_.LiteIndexNeedSort()) {
+ std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
+ index_.SortLiteIndex();
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_lite_index_sort_latency_ms(
+ sort_timer->GetElapsedMilliseconds());
+ }
+ }
+
+ // Attempt index merge if needed.
+ if (should_merge) {
+ ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
+
+ std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
+ libtextclassifier3::Status merge_status = index_.Merge();
+
+ if (!merge_status.ok()) {
+ ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
+ if (!index_.Reset().ok()) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Unable to reset to clear index after merge failure. Merge "
+ "failure=%d:%s",
+ merge_status.error_code(), merge_status.error_message().c_str()));
+ } else {
+ return absl_ports::DataLossError(IcingStringUtil::StringPrintf(
+ "Forced to reset index after merge failure. Merge failure=%d:%s",
+ merge_status.error_code(), merge_status.error_message().c_str()));
+ }
+ }
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_index_merge_latency_ms(
+ merge_timer->GetElapsedMilliseconds());
+ }
+ }
+ return status;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/term-indexing-handler.h b/icing/index/term-indexing-handler.h
new file mode 100644
index 0000000..c055bbf
--- /dev/null
+++ b/icing/index/term-indexing-handler.h
@@ -0,0 +1,97 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_TERM_INDEXING_HANDLER_H_
+#define ICING_INDEX_TERM_INDEXING_HANDLER_H_
+
+#include <memory>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/index/index.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/index/string-section-indexing-handler.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+class TermIndexingHandler : public DataIndexingHandler {
+ public:
+ // Creates a TermIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created TermIndexingHandler instance.
+ //
+ // Returns:
+ // - A TermIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<std::unique_ptr<TermIndexingHandler>>
+ Create(const Clock* clock, const Normalizer* normalizer, Index* index,
+ bool build_property_existence_metadata_hits);
+
+ ~TermIndexingHandler() override = default;
+
+ // Handles term indexing process:
+ // - Checks if document_id > last_added_document_id.
+ // - Updates last_added_document_id to document_id.
+ // - Handles PropertyExistenceIndexingHandler.
+ // - Handles StringSectionIndexingHandler.
+ // - Sorts the lite index if necessary.
+ // - Merges the lite index into the main index if necessary.
+ //
+ /// Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+ // document_id of a previously indexed document in non recovery mode.
+ // - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
+ // content.
+ // - DATA_LOSS_ERROR if an attempt to merge the index fails and both indices
+ // are cleared as a result.
+ // - INTERNAL_ERROR if any other errors occur.
+ // - Any main/lite index errors.
+ libtextclassifier3::Status Handle(
+ const TokenizedDocument& tokenized_document, DocumentId document_id,
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+
+ private:
+ explicit TermIndexingHandler(const Clock* clock, Index* index,
+ std::unique_ptr<PropertyExistenceIndexingHandler>
+ property_existence_indexing_handler,
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler)
+ : DataIndexingHandler(clock),
+ index_(*index),
+ property_existence_indexing_handler_(
+ std::move(property_existence_indexing_handler)),
+ string_section_indexing_handler_(
+ std::move(string_section_indexing_handler)) {}
+
+ Index& index_; // Does not own.
+
+ std::unique_ptr<PropertyExistenceIndexingHandler>
+ property_existence_indexing_handler_; // Nullable
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_TERM_INDEXING_HANDLER_H_
diff --git a/icing/index/string-section-indexing-handler_test.cc b/icing/index/term-indexing-handler_test.cc
index 2c7f5e3..1b03865 100644
--- a/icing/index/string-section-indexing-handler_test.cc
+++ b/icing/index/term-indexing-handler_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/term-indexing-handler.h"
#include <cstdint>
#include <limits>
@@ -24,8 +24,10 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/portable-file-backed-proto-log.h"
@@ -34,6 +36,7 @@
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/property-existence-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
@@ -93,7 +96,7 @@ constexpr std::string_view kPropertySubject = "subject";
constexpr SectionId kSectionIdNestedBody = 1;
-class StringSectionIndexingHandlerTest : public Test {
+class TermIndexingHandlerTest : public Test {
protected:
void SetUp() override {
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
@@ -205,6 +208,16 @@ class StringSectionIndexingHandlerTest : public Test {
std::unique_ptr<DocumentStore> document_store_;
};
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryExistence(Index* index, std::string_view property_path) {
+ return index->GetIterator(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, property_path),
+ /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY,
+ /*need_hit_term_frequency=*/false);
+}
+
std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
std::vector<DocHitInfo> infos;
while (iterator->Advance().ok()) {
@@ -227,7 +240,70 @@ std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
return infos;
}
-TEST_F(StringSectionIndexingHandlerTest,
+TEST_F(TermIndexingHandlerTest, HandleBothStringSectionAndPropertyExistence) {
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo")
+ .AddStringProperty(std::string(kPropertyBody), "")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<TermIndexingHandler> handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index.get(),
+ /*build_property_existence_metadata_hits=*/true));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id));
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ std::vector<DocHitInfoTermFrequencyPair> hits =
+ GetHitsWithTermFrequency(std::move(itr));
+ std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+ {kSectionIdTitle, 1}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ document_id, expected_map)));
+
+ // Query for "title" property existence.
+ ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), kPropertyTitle));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+ // Query for "body" property existence.
+ ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), kPropertyBody));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(TermIndexingHandlerTest,
HandleIntoLiteIndex_sortInIndexingNotTriggered) {
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
/*lite_index_sort_at_indexing=*/true,
@@ -256,9 +332,10 @@ TEST_F(StringSectionIndexingHandlerTest,
EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler> handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index.get()));
+ std::unique_ptr<TermIndexingHandler> handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index.get(),
+ /*build_property_existence_metadata_hits=*/true));
EXPECT_THAT(
handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
/*put_document_stats=*/nullptr),
@@ -287,8 +364,7 @@ TEST_F(StringSectionIndexingHandlerTest,
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
-TEST_F(StringSectionIndexingHandlerTest,
- HandleIntoLiteIndex_sortInIndexingTriggered) {
+TEST_F(TermIndexingHandlerTest, HandleIntoLiteIndex_sortInIndexingTriggered) {
// Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
// HitBuffer after inserting 8 hits
Index::Options options(index_dir_,
@@ -348,9 +424,10 @@ TEST_F(StringSectionIndexingHandlerTest,
EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler> handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index.get()));
+ std::unique_ptr<TermIndexingHandler> handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index.get(),
+ /*build_property_existence_metadata_hits=*/true));
// Handle doc0 and doc1. The LiteIndex should sort and merge after adding
// these
@@ -429,8 +506,7 @@ TEST_F(StringSectionIndexingHandlerTest,
"foo", expected_section_ids_tf_map0)));
}
-TEST_F(StringSectionIndexingHandlerTest,
- HandleIntoLiteIndex_enableSortInIndexing) {
+TEST_F(TermIndexingHandlerTest, HandleIntoLiteIndex_enableSortInIndexing) {
// Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
// HitBuffer after inserting 8 hits
Index::Options options(index_dir_,
@@ -490,9 +566,10 @@ TEST_F(StringSectionIndexingHandlerTest,
EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler> handler,
- StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
- index.get()));
+ std::unique_ptr<TermIndexingHandler> handler,
+ TermIndexingHandler::Create(
+ &fake_clock_, normalizer_.get(), index.get(),
+ /*build_property_existence_metadata_hits=*/true));
// Handle all docs
EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
diff --git a/icing/join/document-id-to-join-info.h b/icing/join/document-id-to-join-info.h
new file mode 100644
index 0000000..dee4885
--- /dev/null
+++ b/icing/join/document-id-to-join-info.h
@@ -0,0 +1,67 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
+#define ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
+
+#include <utility>
+
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocumentIdToJoinInfo is composed of document_id and its join info.
+// - QualifiedId join: join info is the referenced document's namespace_id +
+// fingerprint(uri).
+// - String join: join info is the term id.
+// - Integer join: join info is the integer.
+//
+// DocumentIdToJoinInfo will be stored in posting list.
+template <typename JoinInfoType>
+class DocumentIdToJoinInfo {
+ public:
+ static DocumentIdToJoinInfo<JoinInfoType> GetInvalid() {
+ return DocumentIdToJoinInfo<JoinInfoType>(kInvalidDocumentId,
+ JoinInfoType());
+ }
+
+ explicit DocumentIdToJoinInfo(DocumentId document_id, JoinInfoType join_info)
+ : document_id_(document_id), join_info_(std::move(join_info)) {}
+
+ DocumentId document_id() const { return document_id_; }
+ const JoinInfoType& join_info() const { return join_info_; }
+
+ bool is_valid() const { return IsDocumentIdValid(document_id_); }
+
+ bool operator<(const DocumentIdToJoinInfo<JoinInfoType>& other) const {
+ if (document_id_ != other.document_id_) {
+ return document_id_ < other.document_id_;
+ }
+ return join_info_ < other.join_info_;
+ }
+
+ bool operator==(const DocumentIdToJoinInfo<JoinInfoType>& other) const {
+ return document_id_ == other.document_id_ && join_info_ == other.join_info_;
+ }
+
+ private:
+ DocumentId document_id_;
+ JoinInfoType join_info_;
+} __attribute__((packed));
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
index e27b1ea..1b7ca0d 100644
--- a/icing/join/join-processor.cc
+++ b/icing/join/join-processor.cc
@@ -29,6 +29,7 @@
#include "icing/join/aggregation-scorer.h"
#include "icing/join/doc-join-info.h"
#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
@@ -37,6 +38,7 @@
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -53,17 +55,121 @@ JoinProcessor::GetChildrenFetcher(
"Parent property expression must be ", kQualifiedIdExpr));
}
- std::sort(
- child_scored_document_hits.begin(), child_scored_document_hits.end(),
- ScoredDocumentHitComparator(
- /*is_descending=*/join_spec.nested_spec().scoring_spec().order_by() ==
- ScoringSpecProto::Order::DESC));
-
- // TODO(b/256022027):
- // - Optimization
- // - Cache property to speed up property retrieval.
- // - If there is no cache, then we still have the flexibility to fetch it
- // from actual docs via DocumentStore.
+ ScoredDocumentHitComparator score_comparator(
+ /*is_descending=*/join_spec.nested_spec().scoring_spec().order_by() ==
+ ScoringSpecProto::Order::DESC);
+
+ if (qualified_id_join_index_->is_v2()) {
+ // v2
+ // Step 1a: sort child ScoredDocumentHits in document id descending order.
+ std::sort(child_scored_document_hits.begin(),
+ child_scored_document_hits.end(),
+ [](const ScoredDocumentHit& lhs, const ScoredDocumentHit& rhs) {
+ return lhs.document_id() > rhs.document_id();
+ });
+
+ // Step 1b: group all child ScoredDocumentHits by the document's
+ // schema_type_id.
+ std::unordered_map<SchemaTypeId, std::vector<ScoredDocumentHit>>
+ schema_to_child_scored_doc_hits_map;
+ for (const ScoredDocumentHit& child_scored_document_hit :
+ child_scored_document_hits) {
+ std::optional<DocumentFilterData> child_doc_filter_data =
+ doc_store_->GetAliveDocumentFilterData(
+ child_scored_document_hit.document_id(), current_time_ms_);
+ if (!child_doc_filter_data) {
+ continue;
+ }
+
+ schema_to_child_scored_doc_hits_map[child_doc_filter_data
+ ->schema_type_id()]
+ .push_back(child_scored_document_hit);
+ }
+
+ // Step 1c: for each schema_type_id, lookup QualifiedIdJoinIndexImplV2 to
+ // fetch all child join data from posting list(s). Convert all
+ // child join data to referenced parent document ids and bucketize
+ // child ScoredDocumentHits by it.
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ parent_to_child_docs_map;
+ for (auto& [schema_type_id, grouped_child_scored_doc_hits] :
+ schema_to_child_scored_doc_hits_map) {
+ // Get joinable_property_id of this schema.
+ ICING_ASSIGN_OR_RETURN(
+ const JoinablePropertyMetadata* metadata,
+ schema_store_->GetJoinablePropertyMetadata(
+ schema_type_id, join_spec.child_property_expression()));
+ if (metadata == nullptr ||
+ metadata->value_type != JoinableConfig::ValueType::QUALIFIED_ID) {
+ // Currently we only support qualified id, so skip other types.
+ continue;
+ }
+
+ // Lookup QualifiedIdJoinIndexImplV2.
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase>
+ join_index_iter,
+ qualified_id_join_index_->GetIterator(
+ schema_type_id, /*joinable_property_id=*/metadata->id));
+
+ // - Join index contains all join data of schema_type_id and
+ // join_index_iter will return all of them in (child) document id
+ // descending order.
+ // - But we only need join data of child document ids which appear in
+ // grouped_child_scored_doc_hits. Also grouped_child_scored_doc_hits
+ // contain ScoredDocumentHits in (child) document id descending order.
+ // - Therefore, we advance 2 iterators to intersect them and get desired
+ // join data.
+ auto child_scored_doc_hits_iter = grouped_child_scored_doc_hits.cbegin();
+ while (join_index_iter->Advance().ok() &&
+ child_scored_doc_hits_iter !=
+ grouped_child_scored_doc_hits.cend()) {
+ // Advance child_scored_doc_hits_iter until it points to a
+ // ScoredDocumentHit with document id <= the one pointed by
+ // join_index_iter.
+ while (child_scored_doc_hits_iter !=
+ grouped_child_scored_doc_hits.cend() &&
+ child_scored_doc_hits_iter->document_id() >
+ join_index_iter->GetCurrent().document_id()) {
+ ++child_scored_doc_hits_iter;
+ }
+
+ if (child_scored_doc_hits_iter !=
+ grouped_child_scored_doc_hits.cend() &&
+ child_scored_doc_hits_iter->document_id() ==
+ join_index_iter->GetCurrent().document_id()) {
+ // We get a join data whose child document id exists in both join
+ // index and grouped_child_scored_doc_hits. Convert its join info to
+ // referenced parent document ids and bucketize ScoredDocumentHits by
+ // it (putting into parent_to_child_docs_map).
+ const NamespaceFingerprintIdentifier& ref_ns_id =
+ join_index_iter->GetCurrent().join_info();
+ libtextclassifier3::StatusOr<DocumentId> ref_parent_doc_id_or =
+ doc_store_->GetDocumentId(ref_ns_id);
+ if (ref_parent_doc_id_or.ok()) {
+ parent_to_child_docs_map[std::move(ref_parent_doc_id_or)
+ .ValueOrDie()]
+ .push_back(*child_scored_doc_hits_iter);
+ }
+ }
+ }
+ }
+
+ // Step 1d: finally, sort each parent's joined child ScoredDocumentHits by
+ // score.
+ for (auto& [parent_doc_id, bucketized_child_scored_hits] :
+ parent_to_child_docs_map) {
+ std::sort(bucketized_child_scored_hits.begin(),
+ bucketized_child_scored_hits.end(), score_comparator);
+ }
+
+ return JoinChildrenFetcher(join_spec, std::move(parent_to_child_docs_map));
+ }
+
+ // v1
+ // TODO(b/275121148): deprecate this part after rollout v2.
+ std::sort(child_scored_document_hits.begin(),
+ child_scored_document_hits.end(), score_comparator);
// Step 1: group child documents by parent documentId. Currently we only
// support QualifiedId joining, so fetch the qualified id content of
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index f503442..a40d934 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -22,9 +22,13 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/portable/platform.h"
@@ -58,6 +62,9 @@ namespace {
using ::testing::ElementsAre;
using ::testing::IsTrue;
+// TODO(b/275121148): remove template after deprecating
+// QualifiedIdJoinIndexImplV1.
+template <typename T>
class JoinProcessorTest : public ::testing::Test {
protected:
void SetUp() override {
@@ -108,6 +115,25 @@ class JoinProcessorTest : public ::testing::Test {
.SetDataTypeJoinableString(
JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("receiver")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
@@ -121,18 +147,15 @@ class JoinProcessorTest : public ::testing::Test {
DocumentStore::Create(
&filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*namespace_id_fingerprint=*/true, /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr));
doc_store_ = std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(
- qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ CreateQualifiedIdJoinIndex<T>());
}
void TearDown() override {
@@ -143,6 +166,28 @@ class JoinProcessorTest : public ::testing::Test {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ template <typename UnknownJoinIndexType>
+ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+ CreateQualifiedIdJoinIndex() {
+ return absl_ports::InvalidArgumentError("Unknown type");
+ }
+
+ template <>
+ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+ CreateQualifiedIdJoinIndex<QualifiedIdJoinIndexImplV1>() {
+ return QualifiedIdJoinIndexImplV1::Create(
+ filesystem_, qualified_id_join_index_dir_, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false);
+ }
+
+ template <>
+ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+ CreateQualifiedIdJoinIndex<QualifiedIdJoinIndexImplV2>() {
+ return QualifiedIdJoinIndexImplV2::Create(filesystem_,
+ qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false);
+ }
+
libtextclassifier3::StatusOr<DocumentId> PutAndIndexDocument(
const DocumentProto& document) {
ICING_ASSIGN_OR_RETURN(DocumentId document_id, doc_store_->Put(document));
@@ -153,7 +198,7 @@ class JoinProcessorTest : public ::testing::Test {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id,
/*recovery_mode=*/false,
@@ -163,8 +208,8 @@ class JoinProcessorTest : public ::testing::Test {
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
const JoinSpecProto& join_spec,
- std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
- std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+ std::vector<ScoredDocumentHit> parent_scored_document_hits,
+ std::vector<ScoredDocumentHit> child_scored_document_hits) {
JoinProcessor join_processor(
doc_store_.get(), schema_store_.get(), qualified_id_join_index_.get(),
/*current_time_ms=*/fake_clock_.GetSystemTimeMilliseconds());
@@ -191,7 +236,11 @@ class JoinProcessorTest : public ::testing::Test {
FakeClock fake_clock_;
};
-TEST_F(JoinProcessorTest, JoinByQualifiedId) {
+using TestTypes =
+ ::testing::Types<QualifiedIdJoinIndexImplV1, QualifiedIdJoinIndexImplV2>;
+TYPED_TEST_SUITE(JoinProcessorTest, TestTypes);
+
+TYPED_TEST(JoinProcessorTest, JoinByQualifiedId_allDocuments) {
DocumentProto person1 = DocumentBuilder()
.SetKey("pkg$db/namespace", "person1")
.SetSchema("Person")
@@ -227,15 +276,15 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(person1));
+ this->PutAndIndexDocument(person1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- PutAndIndexDocument(person2));
+ this->PutAndIndexDocument(person2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- PutAndIndexDocument(email1));
+ this->PutAndIndexDocument(email1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
- PutAndIndexDocument(email2));
+ this->PutAndIndexDocument(email2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
- PutAndIndexDocument(email3));
+ this->PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -267,8 +316,8 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
EXPECT_THAT(
joined_result_document_hits,
ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
@@ -282,7 +331,112 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
{scored_doc_hit5, scored_doc_hit3}))));
}
-TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
+TYPED_TEST(JoinProcessorTest, JoinByQualifiedId_partialDocuments) {
+ DocumentProto person1 = DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("Name", "Alice")
+ .Build();
+ DocumentProto person2 = DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("Name", "Bob")
+ .Build();
+ DocumentProto person3 = DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("Name", "Eve")
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("sender", "pkg$db/namespace#person1")
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("sender", "pkg$db/namespace#person2")
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("sender", "pkg$db/namespace#person3")
+ .Build();
+ DocumentProto email4 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 4")
+ .AddStringProperty("sender", "pkg$db/namespace#person1")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ this->PutAndIndexDocument(person1));
+ ICING_ASSERT_OK(/*document_id2 unused*/
+ this->PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ this->PutAndIndexDocument(person3));
+ ICING_ASSERT_OK(/*document_id4 unused*/
+ this->PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ this->PutAndIndexDocument(email2));
+ ICING_ASSERT_OK(/*document_id6 unused*/
+ this->PutAndIndexDocument(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id7,
+ this->PutAndIndexDocument(email4));
+
+ ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+ /*score=*/0.0);
+ ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+ /*score=*/0.0);
+ ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+ /*score=*/4.0);
+ ScoredDocumentHit scored_doc_hit7(document_id7, kSectionIdMaskNone,
+ /*score=*/5.0);
+
+ // Only join person1, person3, email2 and email4.
+ // Parent ScoredDocumentHits: person1, person3
+ std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+ scored_doc_hit3, scored_doc_hit1};
+
+ // Child ScoredDocumentHits: email2, email4
+ std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit7,
+ scored_doc_hit5};
+
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec.set_child_property_expression("sender");
+ join_spec.set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+ ScoringSpecProto::Order::DESC);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
+ EXPECT_THAT(
+ joined_result_document_hits,
+ ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+ /*final_score=*/0.0,
+ /*parent_scored_document_hit=*/scored_doc_hit3,
+ /*child_scored_document_hits=*/{})),
+ EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+ /*final_score=*/1.0,
+ /*parent_scored_document_hit=*/scored_doc_hit1,
+ /*child_scored_document_hits=*/{scored_doc_hit7}))));
+}
+
+TYPED_TEST(JoinProcessorTest,
+ ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
DocumentProto person1 = DocumentBuilder()
.SetKey("pkg$db/namespace", "person1")
.SetSchema("Person")
@@ -303,11 +457,11 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(person1));
+ this->PutAndIndexDocument(person1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- PutAndIndexDocument(email1));
+ this->PutAndIndexDocument(email1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- PutAndIndexDocument(email2));
+ this->PutAndIndexDocument(email2));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -335,8 +489,8 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Since Email2 doesn't have "sender" property, it should be ignored.
EXPECT_THAT(
joined_result_document_hits,
@@ -345,7 +499,8 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
/*child_scored_document_hits=*/{scored_doc_hit2}))));
}
-TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
+TYPED_TEST(JoinProcessorTest,
+ ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
DocumentProto person1 = DocumentBuilder()
.SetKey("pkg$db/namespace", "person1")
.SetSchema("Person")
@@ -379,13 +534,13 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(person1));
+ this->PutAndIndexDocument(person1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- PutAndIndexDocument(email1));
+ this->PutAndIndexDocument(email1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- PutAndIndexDocument(email2));
+ this->PutAndIndexDocument(email2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
- PutAndIndexDocument(email3));
+ this->PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -415,8 +570,8 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Email 2 and email 3 (document id 3 and 4) contain invalid qualified ids.
// Join processor should ignore them.
EXPECT_THAT(joined_result_document_hits,
@@ -426,7 +581,7 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
/*child_scored_document_hits=*/{scored_doc_hit2}))));
}
-TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
+TYPED_TEST(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
DocumentProto person1 = DocumentBuilder()
.SetKey("pkg$db/namespace", "person1")
.SetSchema("Person")
@@ -448,11 +603,11 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(person1));
+ this->PutAndIndexDocument(person1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- PutAndIndexDocument(person2));
+ this->PutAndIndexDocument(person2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- PutAndIndexDocument(email1));
+ this->PutAndIndexDocument(email1));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -479,8 +634,8 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Person1 has no child documents, but left join should also include it.
EXPECT_THAT(
joined_result_document_hits,
@@ -494,7 +649,7 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
/*child_scored_document_hits=*/{}))));
}
-TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
+TYPED_TEST(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
DocumentProto person1 = DocumentBuilder()
.SetKey("pkg$db/namespace", "person1")
.SetSchema("Person")
@@ -524,13 +679,13 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(person1));
+ this->PutAndIndexDocument(person1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- PutAndIndexDocument(email1));
+ this->PutAndIndexDocument(email1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- PutAndIndexDocument(email2));
+ this->PutAndIndexDocument(email2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
- PutAndIndexDocument(email3));
+ this->PutAndIndexDocument(email3));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -560,8 +715,8 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Child documents should be sorted according to the (nested) ranking
// strategy.
EXPECT_THAT(
@@ -572,7 +727,7 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
{scored_doc_hit3, scored_doc_hit4, scored_doc_hit2}))));
}
-TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
+TYPED_TEST(JoinProcessorTest, ShouldAllowSelfJoining) {
DocumentProto email1 =
DocumentBuilder()
.SetKey("pkg$db/namespace", "email1")
@@ -582,7 +737,7 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(email1));
+ this->PutAndIndexDocument(email1));
ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
/*score=*/0.0);
@@ -605,8 +760,8 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ this->Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
EXPECT_THAT(joined_result_document_hits,
ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
/*final_score=*/1.0,
@@ -614,6 +769,156 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
/*child_scored_document_hits=*/{scored_doc_hit1}))));
}
+TYPED_TEST(JoinProcessorTest, MultipleChildSchemasJoining) {
+ DocumentProto person1 = DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("Name", "Alice")
+ .Build();
+ DocumentProto person2 = DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("Name", "Bob")
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("sender", "pkg$db/namespace#person2")
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("sender", "pkg$db/namespace#person1")
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("sender", "pkg$db/namespace#person1")
+ .Build();
+ DocumentProto message1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "message1")
+ .SetSchema("Message")
+ .AddStringProperty("content", "test content 1")
+ .AddStringProperty("sender", "pkg$db/namespace#person1")
+ .AddStringProperty("receiver", "pkg$db/namespace#person2")
+ .Build();
+ DocumentProto message2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "message2")
+ .SetSchema("Message")
+ .AddStringProperty("content", "test content 2")
+ .AddStringProperty("sender", "pkg$db/namespace#person2")
+ .AddStringProperty("receiver", "pkg$db/namespace#person1")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ this->PutAndIndexDocument(person1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ this->PutAndIndexDocument(person2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ this->PutAndIndexDocument(email1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ this->PutAndIndexDocument(email2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ this->PutAndIndexDocument(email3));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+ this->PutAndIndexDocument(message1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id7,
+ this->PutAndIndexDocument(message2));
+
+ ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+ /*score=*/0.0);
+ ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+ /*score=*/0.0);
+ ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+ /*score=*/5.0);
+ ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
+ /*score=*/3.0);
+ ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+ /*score=*/2.0);
+ ScoredDocumentHit scored_doc_hit6(document_id6, kSectionIdMaskNone,
+ /*score=*/4.0);
+ ScoredDocumentHit scored_doc_hit7(document_id7, kSectionIdMaskNone,
+ /*score=*/1.0);
+
+ // Parent ScoredDocumentHits: all Person documents
+ std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+ scored_doc_hit1, scored_doc_hit2};
+
+ // Child ScoredDocumentHits: all Email and Message documents
+ std::vector<ScoredDocumentHit> child_scored_document_hits = {
+ scored_doc_hit3, scored_doc_hit4, scored_doc_hit5, scored_doc_hit6,
+ scored_doc_hit7};
+
+ // Join by "sender".
+ // - Person1: [
+ // email2 (scored_doc_hit4),
+ // email3 (scored_doc_hit5),
+ // message1 (scored_doc_hit6),
+ // ]
+ // - Person2: [
+ // email1 (scored_doc_hit3),
+ // message2 (scored_doc_hit7),
+ // ]
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec.set_child_property_expression("sender");
+ join_spec.set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+ ScoringSpecProto::Order::DESC);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<JoinedScoredDocumentHit> joined_result_document_hits1,
+ this->Join(join_spec, parent_scored_document_hits,
+ child_scored_document_hits));
+ EXPECT_THAT(
+ joined_result_document_hits1,
+ ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+ /*final_score=*/3.0,
+ /*parent_scored_document_hit=*/scored_doc_hit1,
+ /*child_scored_document_hits=*/
+ {scored_doc_hit6, scored_doc_hit4, scored_doc_hit5})),
+ EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+ /*final_score=*/2.0,
+ /*parent_scored_document_hit=*/scored_doc_hit2,
+ /*child_scored_document_hits=*/
+ {scored_doc_hit3, scored_doc_hit7}))));
+
+ // Join by "receiver".
+ // - Person1: [
+ // message2 (scored_doc_hit7),
+ // ]
+ // - Person2: [
+ // message1 (scored_doc_hit6),
+ // ]
+ join_spec.set_child_property_expression("receiver");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<JoinedScoredDocumentHit> joined_result_document_hits2,
+ this->Join(join_spec, parent_scored_document_hits,
+ child_scored_document_hits));
+ EXPECT_THAT(
+ joined_result_document_hits2,
+ ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+ /*final_score=*/1.0,
+ /*parent_scored_document_hit=*/scored_doc_hit1,
+ /*child_scored_document_hits=*/{scored_doc_hit7})),
+ EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+ /*final_score=*/1.0,
+ /*parent_scored_document_hit=*/scored_doc_hit2,
+ /*child_scored_document_hits=*/{scored_doc_hit6}))));
+}
+
// TODO(b/256022027): add unit tests for non-joinable property. If joinable
// value type is unset, then qualifed id join should not
// include the child document even if it contains a valid
diff --git a/icing/join/posting-list-join-data-accessor.h b/icing/join/posting-list-join-data-accessor.h
new file mode 100644
index 0000000..6669f9f
--- /dev/null
+++ b/icing/join/posting-list-join-data-accessor.h
@@ -0,0 +1,211 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
+#define ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// This class is used to provide a simple abstraction for adding join data to
+// posting lists. PostingListJoinDataAccessor handles:
+// 1) selection of properly-sized posting lists for the accumulated join index
+// data during Finalize()
+// 2) chaining of max-sized posting lists.
+template <typename JoinDataType>
+class PostingListJoinDataAccessor : public PostingListAccessor {
+ public:
+ // Creates an empty PostingListJoinDataAccessor.
+ //
+ // RETURNS:
+ // - On success, a valid instance of PostingListJoinDataAccessor
+ // - INVALID_ARGUMENT error if storage has an invalid block_size.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+ Create(FlashIndexStorage* storage,
+ PostingListJoinDataSerializer<JoinDataType>* serializer);
+
+ // Creates a PostingListJoinDataAccessor with an existing posting list
+ // identified by existing_posting_list_id.
+ //
+ // RETURNS:
+ // - On success, a valid instance of PostingListJoinDataAccessor
+ // - INVALID_ARGUMENT if storage has an invalid block_size.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+ CreateFromExisting(FlashIndexStorage* storage,
+ PostingListJoinDataSerializer<JoinDataType>* serializer,
+ PostingListIdentifier existing_posting_list_id);
+
+ PostingListSerializer* GetSerializer() override { return serializer_; }
+
+ // Retrieves the next batch of data in the posting list chain.
+ //
+ // RETURNS:
+ // - On success, a vector of join data in the posting list chain
+ // - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+ // Create.
+ // - INTERNAL_ERROR if unable to read the next posting list in the chain or
+ // if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetNextDataBatch();
+
+ // Prepends one data. This may result in flushing the posting list to disk (if
+ // the PostingListJoinDataAccessor holds a max-sized posting list that is
+ // full) or freeing a pre-existing posting list if it is too small to fit all
+ // data necessary.
+ //
+ // RETURNS:
+ // - OK, on success
+ // - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
+ // previously added data.
+ // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+ // posting list.
+ libtextclassifier3::Status PrependData(const JoinDataType& data);
+
+ private:
+ explicit PostingListJoinDataAccessor(
+ FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
+ PostingListJoinDataSerializer<JoinDataType>* serializer)
+ : PostingListAccessor(storage, std::move(in_memory_posting_list)),
+ serializer_(serializer) {}
+
+ PostingListJoinDataSerializer<JoinDataType>* serializer_; // Does not own.
+};
+
+template <typename JoinDataType>
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+PostingListJoinDataAccessor<JoinDataType>::Create(
+ FlashIndexStorage* storage,
+ PostingListJoinDataSerializer<JoinDataType>* serializer) {
+ uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+ storage->block_size(), serializer->GetDataTypeBytes());
+ ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer, max_posting_list_bytes));
+ return std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>(
+ new PostingListJoinDataAccessor<JoinDataType>(
+ storage, std::move(in_memory_posting_list), serializer));
+}
+
+template <typename JoinDataType>
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ FlashIndexStorage* storage,
+ PostingListJoinDataSerializer<JoinDataType>* serializer,
+ PostingListIdentifier existing_posting_list_id) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ Create(storage, serializer));
+ ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+ storage->GetPostingList(existing_posting_list_id));
+ pl_accessor->preexisting_posting_list_ =
+ std::make_unique<PostingListHolder>(std::move(holder));
+ return pl_accessor;
+}
+
+// Returns the next batch of join data for the provided posting list.
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<std::vector<JoinDataType>>
+PostingListJoinDataAccessor<JoinDataType>::GetNextDataBatch() {
+ if (preexisting_posting_list_ == nullptr) {
+ if (has_reached_posting_list_chain_end_) {
+ return std::vector<JoinDataType>();
+ }
+ return absl_ports::FailedPreconditionError(
+ "Cannot retrieve data from a PostingListJoinDataAccessor that was not "
+ "created from a preexisting posting list.");
+ }
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<JoinDataType> batch,
+ serializer_->GetData(&preexisting_posting_list_->posting_list));
+ uint32_t next_block_index = kInvalidBlockIndex;
+ // Posting lists will only be chained when they are max-sized, in which case
+ // next_block_index will point to the next block for the next posting list.
+ // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+ // to the next free list block, which is not relevant here.
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ next_block_index = preexisting_posting_list_->next_block_index;
+ }
+
+ if (next_block_index != kInvalidBlockIndex) {
+ // Since we only have to deal with next block for max-sized posting list
+ // block, max_num_posting_lists is 1 and posting_list_index_bits is
+ // BitsToStore(1).
+ PostingListIdentifier next_posting_list_id(
+ next_block_index, /*posting_list_index=*/0,
+ /*posting_list_index_bits=*/BitsToStore(1));
+ ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+ storage_->GetPostingList(next_posting_list_id));
+ preexisting_posting_list_ =
+ std::make_unique<PostingListHolder>(std::move(holder));
+ } else {
+ has_reached_posting_list_chain_end_ = true;
+ preexisting_posting_list_.reset();
+ }
+ return batch;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataAccessor<JoinDataType>::PrependData(
+ const JoinDataType& data) {
+ PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
+ ? preexisting_posting_list_->posting_list
+ : in_memory_posting_list_;
+ libtextclassifier3::Status status =
+ serializer_->PrependData(&active_pl, data);
+ if (!absl_ports::IsResourceExhausted(status)) {
+ return status;
+ }
+ // There is no more room to add data to this current posting list! Therefore,
+ // we need to either move those data to a larger posting list or flush this
+ // posting list and create another max-sized posting list in the chain.
+ if (preexisting_posting_list_ != nullptr) {
+ ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+ } else {
+ ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+ }
+
+ // Re-add data. Should always fit since we just cleared
+ // in_memory_posting_list_. It's fine to explicitly reference
+ // in_memory_posting_list_ here because there's no way of reaching this line
+ // while preexisting_posting_list_ is still in use.
+ return serializer_->PrependData(&in_memory_posting_list_, data);
+}
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
diff --git a/icing/join/posting-list-join-data-accessor_test.cc b/icing/join/posting-list-join-data-accessor_test.cc
new file mode 100644
index 0000000..ddc2d32
--- /dev/null
+++ b/icing/join/posting-list-join-data-accessor_test.cc
@@ -0,0 +1,435 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/posting-list-join-data-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+using JoinDataType = DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>;
+
+static constexpr NamespaceId kDefaultNamespaceId = 1;
+
+class PostingListJoinDataAccessorTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/test_dir";
+ file_name_ = test_dir_ + "/test_file.idx.index";
+
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+ serializer_ =
+ std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+ flash_index_storage_ =
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+ }
+
+ void TearDown() override {
+ flash_index_storage_.reset();
+ serializer_.reset();
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+ std::string file_name_;
+ std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>> serializer_;
+ std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+};
+
+std::vector<JoinDataType> CreateData(int num_data, DocumentId start_document_id,
+ NamespaceId ref_namespace_id,
+ uint64_t start_ref_hash_uri) {
+ std::vector<JoinDataType> data;
+ data.reserve(num_data);
+ for (int i = 0; i < num_data; ++i) {
+ data.push_back(JoinDataType(
+ start_document_id,
+ NamespaceFingerprintIdentifier(ref_namespace_id,
+ /*fingerprint=*/start_ref_hash_uri)));
+
+ ++start_document_id;
+ ++start_ref_hash_uri;
+ }
+ return data;
+}
+
+TEST_F(PostingListJoinDataAccessorTest, DataAddAndRetrieveProperly) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ // Add some join data
+ std::vector<JoinDataType> data_vec =
+ CreateData(/*num_data=*/5, /*start_document_id=*/0,
+ /*ref_namespace_id=*/kDefaultNamespaceId,
+ /*start_ref_hash_uri=*/819);
+ for (const JoinDataType& data : data_vec) {
+ EXPECT_THAT(pl_accessor->PrependData(data), IsOk());
+ }
+ PostingListAccessor::FinalizeResult result =
+ std::move(*pl_accessor).Finalize();
+ EXPECT_THAT(result.status, IsOk());
+ EXPECT_THAT(result.id.block_index(), Eq(1));
+ EXPECT_THAT(result.id.posting_list_index(), Eq(0));
+
+ // Retrieve some data.
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result.id));
+ EXPECT_THAT(
+ serializer_->GetData(&pl_holder.posting_list),
+ IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, PreexistingPLKeepOnSameBlock) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ // Add a single data. This will fit in a min-sized posting list.
+ JoinDataType data1(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/123));
+ ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result1.status);
+ // Should be allocated to the first block.
+ ASSERT_THAT(result1.id.block_index(), Eq(1));
+ ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+ // Add one more data. The minimum size for a posting list must be able to fit
+ // two data, so this should NOT cause the previous pl to be reallocated.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ JoinDataType data2(
+ /*document_id=*/2,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/456));
+ ICING_ASSERT_OK(pl_accessor->PrependData(data2));
+ PostingListAccessor::FinalizeResult result2 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result2.status);
+ // Should be in the same posting list.
+ EXPECT_THAT(result2.id, Eq(result1.id));
+
+ // The posting list at result2.id should hold all of the data that have been
+ // added.
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result2.id));
+ EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+ IsOkAndHolds(ElementsAre(data2, data1)));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, PreexistingPLReallocateToLargerPL) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ // Adding 3 data should cause Finalize allocating a 56-byte posting list,
+ // which can store at most 4 data.
+ std::vector<JoinDataType> data_vec1 =
+ CreateData(/*num_data=*/3, /*start_document_id=*/0,
+ /*ref_namespace_id=*/kDefaultNamespaceId,
+ /*start_ref_hash_uri=*/819);
+ for (const JoinDataType& data : data_vec1) {
+ ICING_ASSERT_OK(pl_accessor->PrependData(data));
+ }
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result1.status);
+ // Should be allocated to the first block.
+ ASSERT_THAT(result1.id.block_index(), Eq(1));
+ ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+ // Now add more data.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ // The current posting list can fit 1 more data. Adding 12 more data should
+ // result in these data being moved to a larger posting list. Also the total
+ // size of these data won't exceed max size posting list, so there will be
+ // only one single posting list and no chain.
+ std::vector<JoinDataType> data_vec2 = CreateData(
+ /*num_data=*/12, /*start_document_id=*/data_vec1.back().document_id() + 1,
+ /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+
+ for (const JoinDataType& data : data_vec2) {
+ ICING_ASSERT_OK(pl_accessor->PrependData(data));
+ }
+ PostingListAccessor::FinalizeResult result2 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result2.status);
+ // Should be allocated to the second (new) block because the posting list
+ // should grow beyond the size that the first block maintains.
+ EXPECT_THAT(result2.id.block_index(), Eq(2));
+ EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
+
+ // The posting list at result2.id should hold all of the data that have been
+ // added.
+ std::vector<JoinDataType> all_data_vec;
+ all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+ all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+ all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(result2.id));
+ EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+ IsOkAndHolds(ElementsAreArray(all_data_vec.rbegin(),
+ all_data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, MultiBlockChainsBlocksProperly) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(JoinDataType)
+ // is 14, so the max size posting list can store (4096 - 12) / 14 = 291 data.
+ // Adding 292 data should cause:
+ // - 2 max size posting lists being allocated to block 1 and block 2.
+ // - Chaining: block 2 -> block 1
+ std::vector<JoinDataType> data_vec = CreateData(
+ /*num_data=*/292, /*start_document_id=*/0,
+ /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+ for (const JoinDataType& data : data_vec) {
+ ICING_ASSERT_OK(pl_accessor->PrependData(data));
+ }
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result1.status);
+ PostingListIdentifier second_block_id = result1.id;
+ // Should be allocated to the second block.
+ EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
+ /*block_index=*/2, /*posting_list_index=*/0,
+ /*posting_list_index_bits=*/0)));
+
+ // We should be able to retrieve all data.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(second_block_id));
+ // This pl_holder will only hold a posting list with the data that didn't fit
+ // on the first block.
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<JoinDataType> second_block_data,
+ serializer_->GetData(&pl_holder.posting_list));
+ ASSERT_THAT(second_block_data, SizeIs(Lt(data_vec.size())));
+ auto first_block_data_start = data_vec.rbegin() + second_block_data.size();
+ EXPECT_THAT(second_block_data,
+ ElementsAreArray(data_vec.rbegin(), first_block_data_start));
+
+ // Now retrieve all of the data that were on the first block.
+ uint32_t first_block_id = pl_holder.next_block_index;
+ EXPECT_THAT(first_block_id, Eq(1));
+
+ PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+ /*posting_list_index_bits=*/0);
+ ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+ flash_index_storage_->GetPostingList(pl_id));
+ EXPECT_THAT(
+ serializer_->GetData(&pl_holder.posting_list),
+ IsOkAndHolds(ElementsAreArray(first_block_data_start, data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+ PreexistingMultiBlockReusesBlocksProperly) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(JoinDataType)
+ // is 14, so the max size posting list can store (4096 - 12) / 14 = 291 data.
+ // Adding 292 data will cause:
+ // - 2 max size posting lists being allocated to block 1 and block 2.
+ // - Chaining: block 2 -> block 1
+ std::vector<JoinDataType> data_vec1 = CreateData(
+ /*num_data=*/292, /*start_document_id=*/0,
+ /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+ for (const JoinDataType& data : data_vec1) {
+ ICING_ASSERT_OK(pl_accessor->PrependData(data));
+ }
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result1.status);
+ PostingListIdentifier first_add_id = result1.id;
+ EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
+ /*block_index=*/2, /*posting_list_index=*/0,
+ /*posting_list_index_bits=*/0)));
+
+ // Now add more data. These should fit on the existing second block and not
+ // fill it up.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), first_add_id));
+ std::vector<JoinDataType> data_vec2 = CreateData(
+ /*num_data=*/10, /*start_document_id=*/data_vec1.back().document_id() + 1,
+ /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+ for (const JoinDataType& data : data_vec2) {
+ ICING_ASSERT_OK(pl_accessor->PrependData(data));
+ }
+ PostingListAccessor::FinalizeResult result2 =
+ std::move(*pl_accessor).Finalize();
+ ICING_ASSERT_OK(result2.status);
+ PostingListIdentifier second_add_id = result2.id;
+ EXPECT_THAT(second_add_id, Eq(first_add_id));
+
+ // We should be able to retrieve all data.
+ std::vector<JoinDataType> all_data_vec;
+ all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+ all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+ all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListHolder pl_holder,
+ flash_index_storage_->GetPostingList(second_add_id));
+ // This pl_holder will only hold a posting list with the data that didn't fit
+ // on the first block.
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<JoinDataType> second_block_data,
+ serializer_->GetData(&pl_holder.posting_list));
+ ASSERT_THAT(second_block_data, SizeIs(Lt(all_data_vec.size())));
+ auto first_block_data_start =
+ all_data_vec.rbegin() + second_block_data.size();
+ EXPECT_THAT(second_block_data,
+ ElementsAreArray(all_data_vec.rbegin(), first_block_data_start));
+
+ // Now retrieve all of the data that were on the first block.
+ uint32_t first_block_id = pl_holder.next_block_index;
+ EXPECT_THAT(first_block_id, Eq(1));
+
+ PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+ /*posting_list_index_bits=*/0);
+ ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+ flash_index_storage_->GetPostingList(pl_id));
+ EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+ IsOkAndHolds(ElementsAreArray(first_block_data_start,
+ all_data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+ InvalidDataShouldReturnInvalidArgument) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ JoinDataType invalid_data = JoinDataType::GetInvalid();
+ EXPECT_THAT(pl_accessor->PrependData(invalid_data),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+ JoinDataNonIncreasingShouldReturnInvalidArgument) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ JoinDataType data1(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/819));
+ ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+
+ JoinDataType data2(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/818));
+ EXPECT_THAT(pl_accessor->PrependData(data2),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ JoinDataType data3(/*document_id=*/1,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId - 1,
+ /*fingerprint=*/820));
+ EXPECT_THAT(pl_accessor->PrependData(data3),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ JoinDataType data4(/*document_id=*/0,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId + 1,
+ /*fingerprint=*/820));
+ EXPECT_THAT(pl_accessor->PrependData(data4),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+ NewPostingListNoDataAddedShouldReturnInvalidArgument) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ PostingListAccessor::FinalizeResult result =
+ std::move(*pl_accessor).Finalize();
+ EXPECT_THAT(result.status,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+ PreexistingPostingListNoDataAddedShouldSucceed) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor1,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), serializer_.get()));
+ JoinDataType data1(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/819));
+ ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+ PostingListAccessor::FinalizeResult result1 =
+ std::move(*pl_accessor1).Finalize();
+ ICING_ASSERT_OK(result1.status);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor2,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), serializer_.get(), result1.id));
+ PostingListAccessor::FinalizeResult result2 =
+ std::move(*pl_accessor2).Finalize();
+ EXPECT_THAT(result2.status, IsOk());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/posting-list-join-data-serializer.h b/icing/join/posting-list-join-data-serializer.h
new file mode 100644
index 0000000..9f39dca
--- /dev/null
+++ b/icing/join/posting-list-join-data-serializer.h
@@ -0,0 +1,803 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
+#define ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
+
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// A serializer class to serialize JoinDataType to PostingListUsed. Usually
+// JoinDataType is DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>,
+// DocumentIdToJoinInfo<TermId>, or DocumentIdToJoinInfo<int64_t>.
+//
+// REQUIRES:
+// - JoinDataType is comparable by operator <.
+// - JoinDataType implements is_valid() method.
+// - JoinDataType has static method GetInvalid() that returns a JoinDataType
+// instance containing invalid data.
+template <typename JoinDataType>
+class PostingListJoinDataSerializer : public PostingListSerializer {
+ public:
+ using SpecialDataType = SpecialData<JoinDataType>;
+ static_assert(sizeof(SpecialDataType) == sizeof(JoinDataType), "");
+
+ static constexpr uint32_t kSpecialDataSize =
+ kNumSpecialData * sizeof(SpecialDataType);
+
+ uint32_t GetDataTypeBytes() const override { return sizeof(JoinDataType); }
+
+ uint32_t GetMinPostingListSize() const override {
+ static constexpr uint32_t kMinPostingListSize = kSpecialDataSize;
+ static_assert(sizeof(PostingListIndex) <= kMinPostingListSize,
+ "PostingListIndex must be small enough to fit in a "
+ "minimum-sized Posting List.");
+
+ return kMinPostingListSize;
+ }
+
+ uint32_t GetMinPostingListSizeToFit(
+ const PostingListUsed* posting_list_used) const override;
+
+ uint32_t GetBytesUsed(
+ const PostingListUsed* posting_list_used) const override;
+
+ void Clear(PostingListUsed* posting_list_used) const override;
+
+ libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
+ PostingListUsed* src) const override;
+
+ // Prepend a JoinData to the posting list.
+ //
+ // RETURNS:
+ // - INVALID_ARGUMENT if !data.is_valid() or if data is not greater than the
+ // previously added data.
+ // - RESOURCE_EXHAUSTED if there is no more room to add data to the posting
+ // list.
+ libtextclassifier3::Status PrependData(PostingListUsed* posting_list_used,
+ const JoinDataType& data) const;
+
+ // Prepend multiple JoinData to the posting list.
+ // Data should be sorted in ascending order (as defined by the less than
+ // operator for JoinData)
+ // If keep_prepended is true, whatever could be prepended is kept, otherwise
+ // the posting list is reverted and left in its original state.
+ //
+ // RETURNS:
+ // The number of data that have been prepended to the posting list. If
+ // keep_prepended is false and reverted, then it returns 0.
+ libtextclassifier3::StatusOr<uint32_t> PrependDataArray(
+ PostingListUsed* posting_list_used, const JoinDataType* array,
+ uint32_t num_data, bool keep_prepended) const;
+
+ // Retrieves all data stored in the posting list.
+ //
+ // RETURNS:
+ // - On success, a vector of JoinDataType sorted by the reverse order of
+ // prepending.
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetData(
+ const PostingListUsed* posting_list_used) const;
+
+ // Same as GetData but appends data to data_arr_out.
+ //
+ // RETURNS:
+ // - OK on success, and data_arr_out will be appended JoinDataType sorted by
+ // the reverse order of prepending.
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::Status GetData(
+ const PostingListUsed* posting_list_used,
+ std::vector<JoinDataType>* data_arr_out) const;
+
+ // Undo the last num_data data prepended. If num_data > number of data, then
+ // we clear all data.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::Status PopFrontData(PostingListUsed* posting_list_used,
+ uint32_t num_data) const;
+
+ // Helper function to determine if posting list is full.
+ bool IsFull(const PostingListUsed* posting_list_used) const {
+ return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+ GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+ }
+
+ private:
+ // In PostingListJoinDataSerializer, there is no compression, but we still use
+ // the traditional posting list implementation.
+ //
+ // Posting list layout formats:
+ //
+ // NOT_FULL
+ // +-special-data-0--+-special-data-1--+------------+-----------------------+
+ // | | | | |
+ // |data-start-offset| Data::Invalid | 0x00000000 | (compressed) data |
+ // | | | | |
+ // +-----------------+-----------------+------------+-----------------------+
+ //
+ // ALMOST_FULL
+ // +-special-data-0--+-special-data-1--+-----+------------------------------+
+ // | | | | |
+ // | Data::Invalid | 1st data |(pad)| (compressed) data |
+ // | | | | |
+ // +-----------------+-----------------+-----+------------------------------+
+ //
+ // FULL
+ // +-special-data-0--+-special-data-1--+-----+------------------------------+
+ // | | | | |
+ // | 1st data | 2nd data |(pad)| (compressed) data |
+ // | | | | |
+ // +-----------------+-----------------+-----+------------------------------+
+ //
+ // The first two uncompressed (special) data also implicitly encode
+ // information about the size of the compressed data region.
+ //
+ // 1. If the posting list is NOT_FULL, then special_data_0 contains the byte
+ // offset of the start of the compressed data. Thus, the size of the
+ // compressed data is
+ // posting_list_used->size_in_bytes() - special_data_0.data_start_offset().
+ //
+ // 2. If posting list is ALMOST_FULL or FULL, then the compressed data region
+ // starts somewhere between
+ // [kSpecialDataSize, kSpecialDataSize + sizeof(JoinDataType) - 1] and ends
+ // at posting_list_used->size_in_bytes() - 1.
+ //
+ // EXAMPLE
+ // JoinDataType = DocumentIdToJoinInfo<int64_t>. Posting list size: 48 bytes
+ //
+ // EMPTY!
+ // +-- byte 0-11 --+---- 12-23 ----+------------ 24-47 -------------+
+ // | | | |
+ // | 48 | Data::Invalid | 0x00000000 |
+ // | | | |
+ // +---------------+---------------+--------------------------------+
+ //
+ // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 12, JoinInteger = 5)
+ // NOT FULL!
+ // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+ // | | | | 12 |
+ // | 36 | Data::Invalid | 0x00000000 | 5 |
+ // | | | | |
+ // +---------------+---------------+---------------+---------------+
+ //
+ // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 18, JoinInteger = -2)
+ // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+ // | | | 18 | 12 |
+ // | 24 | Data::Invalid | -2 | 5 |
+ // | | | | |
+ // +---------------+---------------+---------------+---------------+
+ //
+ // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 22, JoinInteger = 3)
+ // ALMOST_FULL!
+ // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+ // | | 22 | 18 | 12 |
+ // | Data::Invalid | 3 | -2 | 5 |
+ // | | | | |
+ // +---------------+---------------+---------------+---------------+
+ //
+ // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 27, JoinInteger = 0)
+ // FULL!
+ // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+ // | 27 | 22 | 18 | 12 |
+ // | 0 | 3 | -2 | 5 |
+ // | | | | |
+ // +---------------+---------------+---------------+---------------+
+
+ // Helpers to determine what state the posting list is in.
+ bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
+ return !GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+ GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+ }
+
+ bool IsEmpty(const PostingListUsed* posting_list_used) const {
+ return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset() ==
+ posting_list_used->size_in_bytes() &&
+ !GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+ }
+
+ // Returns false if both special data are invalid or if data start offset
+ // stored in the special data is less than kSpecialDataSize or greater than
+ // posting_list_used->size_in_bytes(). Returns true, otherwise.
+ bool IsPostingListValid(const PostingListUsed* posting_list_used) const;
+
+ // Prepend data to a posting list that is in the ALMOST_FULL state.
+ //
+ // RETURNS:
+ // - OK, if successful
+ // - INVALID_ARGUMENT if data is not less than the previously added data.
+ libtextclassifier3::Status PrependDataToAlmostFull(
+ PostingListUsed* posting_list_used, const JoinDataType& data) const;
+
+ // Prepend data to a posting list that is in the EMPTY state. This will always
+ // succeed because there are no pre-existing data and no validly constructed
+ // posting list could fail to fit one data.
+ void PrependDataToEmpty(PostingListUsed* posting_list_used,
+ const JoinDataType& data) const;
+
+ // Prepend data to a posting list that is in the NOT_FULL state.
+ //
+ // RETURNS:
+ // - OK, if successful
+ // - INVALID_ARGUMENT if data is not less than the previously added data.
+ libtextclassifier3::Status PrependDataToNotFull(
+ PostingListUsed* posting_list_used, const JoinDataType& data,
+ uint32_t offset) const;
+
+ // Returns either 0 (FULL state), sizeof(JoinDataType) (ALMOST_FULL state) or
+ // a byte offset between kSpecialDataSize and
+ // posting_list_used->size_in_bytes() (inclusive) (NOT_FULL state).
+ uint32_t GetStartByteOffset(const PostingListUsed* posting_list_used) const;
+
+ // Sets special data 0 to properly reflect what start byte offset is (see
+ // layout comment for further details).
+ //
+ // Returns false if offset > posting_list_used->size_in_bytes() or offset is
+ // in range (kSpecialDataSize, sizeof(JoinDataType)) or
+ // (sizeof(JoinDataType), 0). True, otherwise.
+ bool SetStartByteOffset(PostingListUsed* posting_list_used,
+ uint32_t offset) const;
+
+ // Helper for MoveFrom/GetData/PopFrontData. Adds limit number of data to out
+ // or all data in the posting list if the posting list contains less than
+ // limit number of data. out can be NULL.
+ //
+ // NOTE: If called with limit=1, pop=true on a posting list that transitioned
+ // from NOT_FULL directly to FULL, GetDataInternal will not return the posting
+ // list to NOT_FULL. Instead it will leave it in a valid state, but it will be
+ // ALMOST_FULL.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::Status GetDataInternal(
+ const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+ std::vector<JoinDataType>* out) const;
+
+ // Retrieves the value stored in the index-th special data.
+ //
+ // REQUIRES:
+ // 0 <= index < kNumSpecialData.
+ //
+ // RETURNS:
+ // - A valid SpecialData<JoinDataType>.
+ SpecialDataType GetSpecialData(const PostingListUsed* posting_list_used,
+ uint32_t index) const;
+
+ // Sets the value stored in the index-th special data to special_data.
+ //
+ // REQUIRES:
+ // 0 <= index < kNumSpecialData.
+ void SetSpecialData(PostingListUsed* posting_list_used, uint32_t index,
+ const SpecialDataType& special_data) const;
+
+ // Prepends data to the memory region
+ // [offset - sizeof(JoinDataType), offset - 1] and
+ // returns the new beginning of the region.
+ //
+ // RETURNS:
+ // - The new beginning of the padded region, if successful.
+ // - INVALID_ARGUMENT if data will not fit (uncompressed) between
+ // [kSpecialDataSize, offset - 1]
+ libtextclassifier3::StatusOr<uint32_t> PrependDataUncompressed(
+ PostingListUsed* posting_list_used, const JoinDataType& data,
+ uint32_t offset) const;
+};
+
+template <typename JoinDataType>
+uint32_t PostingListJoinDataSerializer<JoinDataType>::GetBytesUsed(
+ const PostingListUsed* posting_list_used) const {
+ // The special data will be included if they represent actual data. If they
+ // represent the data start offset or the invalid data sentinel, they are not
+ // included.
+ return posting_list_used->size_in_bytes() -
+ GetStartByteOffset(posting_list_used);
+}
+
+template <typename JoinDataType>
+uint32_t
+PostingListJoinDataSerializer<JoinDataType>::GetMinPostingListSizeToFit(
+ const PostingListUsed* posting_list_used) const {
+ if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
+ // If in either the FULL state or ALMOST_FULL state, this posting list *is*
+ // the minimum size posting list that can fit these data. So just return the
+ // size of the posting list.
+ return posting_list_used->size_in_bytes();
+ }
+
+ // In NOT_FULL state, BytesUsed contains no special data. The minimum sized
+ // posting list that would be guaranteed to fit these data would be
+ // ALMOST_FULL, with kInvalidData in special data 0, the uncompressed data in
+ // special data 1 and the n compressed data in the compressed region.
+ // BytesUsed contains one uncompressed data and n compressed data. Therefore,
+ // fitting these data into a posting list would require BytesUsed plus one
+ // extra data.
+ return GetBytesUsed(posting_list_used) + GetDataTypeBytes();
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::Clear(
+ PostingListUsed* posting_list_used) const {
+ // Safe to ignore return value because posting_list_used->size_in_bytes() is
+ // a valid argument.
+ SetStartByteOffset(posting_list_used,
+ /*offset=*/posting_list_used->size_in_bytes());
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::MoveFrom(
+ PostingListUsed* dst, PostingListUsed* src) const {
+ ICING_RETURN_ERROR_IF_NULL(dst);
+ ICING_RETURN_ERROR_IF_NULL(src);
+ if (GetMinPostingListSizeToFit(src) > dst->size_in_bytes()) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "src MinPostingListSizeToFit %d must be larger than size %d.",
+ GetMinPostingListSizeToFit(src), dst->size_in_bytes()));
+ }
+
+ if (!IsPostingListValid(dst)) {
+ return absl_ports::FailedPreconditionError(
+ "Dst posting list is in an invalid state and can't be used!");
+ }
+ if (!IsPostingListValid(src)) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot MoveFrom an invalid src posting list!");
+ }
+
+ // Pop just enough data that all of src's compressed data fit in
+ // dst posting_list's compressed area. Then we can memcpy that area.
+ std::vector<JoinDataType> data_arr;
+ while (IsFull(src) || IsAlmostFull(src) ||
+ (dst->size_in_bytes() - kSpecialDataSize < GetBytesUsed(src))) {
+ if (!GetDataInternal(src, /*limit=*/1, /*pop=*/true, &data_arr).ok()) {
+ return absl_ports::AbortedError(
+ "Unable to retrieve data from src posting list.");
+ }
+ }
+
+ // memcpy the area and set up start byte offset.
+ Clear(dst);
+ memcpy(dst->posting_list_buffer() + dst->size_in_bytes() - GetBytesUsed(src),
+ src->posting_list_buffer() + GetStartByteOffset(src),
+ GetBytesUsed(src));
+ // Because we popped all data from src outside of the compressed area and we
+ // guaranteed that GetBytesUsed(src) is less than dst->size_in_bytes() -
+ // kSpecialDataSize. This is guaranteed to be a valid byte offset for the
+ // NOT_FULL state, so ignoring the value is safe.
+ SetStartByteOffset(dst, dst->size_in_bytes() - GetBytesUsed(src));
+
+ // Put back remaining data.
+ for (auto riter = data_arr.rbegin(); riter != data_arr.rend(); ++riter) {
+ // PrependData may return:
+ // - INVALID_ARGUMENT: if data is invalid or not less than the previous data
+ // - RESOURCE_EXHAUSTED
+ // RESOURCE_EXHAUSTED should be impossible because we've already assured
+ // that there is enough room above.
+ ICING_RETURN_IF_ERROR(PrependData(dst, *riter));
+ }
+
+ Clear(src);
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependDataToAlmostFull(
+ PostingListUsed* posting_list_used, const JoinDataType& data) const {
+ SpecialDataType special_data = GetSpecialData(posting_list_used, /*index=*/1);
+ if (data < special_data.data()) {
+ return absl_ports::InvalidArgumentError(
+ "JoinData being prepended must not be smaller than the most recent "
+ "JoinData");
+ }
+
+ // Without compression, prepend a new data into ALMOST_FULL posting list will
+ // change the posting list to FULL state. Therefore, set special data 0
+ // directly.
+ SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(data));
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::PrependDataToEmpty(
+ PostingListUsed* posting_list_used, const JoinDataType& data) const {
+ // First data to be added. Just add verbatim, no compression.
+ if (posting_list_used->size_in_bytes() == kSpecialDataSize) {
+ // First data will be stored at special data 1.
+ // Safe to ignore the return value because 1 < kNumSpecialData
+ SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+ // Safe to ignore the return value because sizeof(JoinDataType) is a valid
+ // argument.
+ SetStartByteOffset(posting_list_used, /*offset=*/sizeof(JoinDataType));
+ } else {
+ // Since this is the first data, size != kSpecialDataSize and
+ // size % sizeof(JoinDataType) == 0, we know that there is room to fit
+ // 'data' into the compressed region, so ValueOrDie is safe.
+ uint32_t offset =
+ PrependDataUncompressed(posting_list_used, data,
+ /*offset=*/posting_list_used->size_in_bytes())
+ .ValueOrDie();
+ // Safe to ignore the return value because PrependDataUncompressed is
+ // guaranteed to return a valid offset.
+ SetStartByteOffset(posting_list_used, offset);
+ }
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependDataToNotFull(
+ PostingListUsed* posting_list_used, const JoinDataType& data,
+ uint32_t offset) const {
+ JoinDataType curr = JoinDataType::GetInvalid();
+ memcpy(&curr, posting_list_used->posting_list_buffer() + offset,
+ sizeof(JoinDataType));
+ if (data < curr) {
+ return absl_ports::InvalidArgumentError(
+ "JoinData being prepended must not be smaller than the most recent "
+ "JoinData");
+ }
+
+ if (offset >= kSpecialDataSize + sizeof(JoinDataType)) {
+ offset =
+ PrependDataUncompressed(posting_list_used, data, offset).ValueOrDie();
+ SetStartByteOffset(posting_list_used, offset);
+ } else {
+ // The new data must be put in special data 1.
+ SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+ // State ALMOST_FULL. Safe to ignore the return value because
+ // sizeof(JoinDataType) is a valid argument.
+ SetStartByteOffset(posting_list_used, /*offset=*/sizeof(JoinDataType));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependData(
+ PostingListUsed* posting_list_used, const JoinDataType& data) const {
+ if (!data.is_valid()) {
+ return absl_ports::InvalidArgumentError("Cannot prepend an invalid data!");
+ }
+ if (!IsPostingListValid(posting_list_used)) {
+ return absl_ports::FailedPreconditionError(
+ "This PostingListUsed is in an invalid state and can't add any data!");
+ }
+
+ if (IsFull(posting_list_used)) {
+ // State FULL: no space left.
+ return absl_ports::ResourceExhaustedError("No more room for data");
+ } else if (IsAlmostFull(posting_list_used)) {
+ return PrependDataToAlmostFull(posting_list_used, data);
+ } else if (IsEmpty(posting_list_used)) {
+ PrependDataToEmpty(posting_list_used, data);
+ return libtextclassifier3::Status::OK;
+ } else {
+ uint32_t offset = GetStartByteOffset(posting_list_used);
+ return PrependDataToNotFull(posting_list_used, data, offset);
+ }
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListJoinDataSerializer<JoinDataType>::PrependDataArray(
+ PostingListUsed* posting_list_used, const JoinDataType* array,
+ uint32_t num_data, bool keep_prepended) const {
+ if (!IsPostingListValid(posting_list_used)) {
+ return 0;
+ }
+
+ uint32_t i;
+ for (i = 0; i < num_data; ++i) {
+ if (!PrependData(posting_list_used, array[i]).ok()) {
+ break;
+ }
+ }
+ if (i != num_data && !keep_prepended) {
+ // Didn't fit. Undo everything and check that we have the same offset as
+ // before. PopFrontData guarantees that it will remove all 'i' data so long
+ // as there are at least 'i' data in the posting list, which we know there
+ // are.
+ ICING_RETURN_IF_ERROR(PopFrontData(posting_list_used, /*num_data=*/i));
+ return 0;
+ }
+ return i;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<std::vector<JoinDataType>>
+PostingListJoinDataSerializer<JoinDataType>::GetData(
+ const PostingListUsed* posting_list_used) const {
+ std::vector<JoinDataType> data_arr_out;
+ ICING_RETURN_IF_ERROR(GetData(posting_list_used, &data_arr_out));
+ return data_arr_out;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status PostingListJoinDataSerializer<JoinDataType>::GetData(
+ const PostingListUsed* posting_list_used,
+ std::vector<JoinDataType>* data_arr_out) const {
+ return GetDataInternal(posting_list_used,
+ /*limit=*/std::numeric_limits<uint32_t>::max(),
+ /*pop=*/false, data_arr_out);
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PopFrontData(
+ PostingListUsed* posting_list_used, uint32_t num_data) const {
+ if (num_data == 1 && IsFull(posting_list_used)) {
+ // The PL is in FULL state which means that we save 2 uncompressed data in
+ // the 2 special postions. But FULL state may be reached by 2 different
+ // states.
+ // (1) In ALMOST_FULL state
+ // +------------------+-----------------+-----+---------------------------+
+ // |Data::Invalid |1st data |(pad)|(compressed) data |
+ // | | | | |
+ // +------------------+-----------------+-----+---------------------------+
+ // When we prepend another data, we can only put it at special data 0, and
+ // thus get a FULL PL
+ // +------------------+-----------------+-----+---------------------------+
+ // |new 1st data |original 1st data|(pad)|(compressed) data |
+ // | | | | |
+ // +------------------+-----------------+-----+---------------------------+
+ //
+ // (2) In NOT_FULL state
+ // +------------------+-----------------+-------+---------+---------------+
+ // |data-start-offset |Data::Invalid |(pad) |1st data |(compressed) |
+ // | | | | |data |
+ // +------------------+-----------------+-------+---------+---------------+
+ // When we prepend another data, we can reach any of the 3 following
+ // scenarios:
+ // (2.1) NOT_FULL
+ // if the space of pad and original 1st data can accommodate the new 1st
+ // data and the encoded delta value.
+ // +------------------+-----------------+-----+--------+------------------+
+ // |data-start-offset |Data::Invalid |(pad)|new |(compressed) data |
+ // | | | |1st data| |
+ // +------------------+-----------------+-----+--------+------------------+
+ // (2.2) ALMOST_FULL
+ // If the space of pad and original 1st data cannot accommodate the new 1st
+ // data and the encoded delta value but can accommodate the encoded delta
+ // value only. We can put the new 1st data at special position 1.
+ // +------------------+-----------------+---------+-----------------------+
+ // |Data::Invalid |new 1st data |(pad) |(compressed) data |
+ // | | | | |
+ // +------------------+-----------------+---------+-----------------------+
+ // (2.3) FULL
+ // In very rare case, it cannot even accommodate only the encoded delta
+ // value. we can move the original 1st data into special position 1 and the
+ // new 1st data into special position 0. This may happen because we use
+ // VarInt encoding method which may make the encoded value longer (about
+ // 4/3 times of original)
+ // +------------------+-----------------+--------------+------------------+
+ // |new 1st data |original 1st data|(pad) |(compressed) data |
+ // | | | | |
+ // +------------------+-----------------+--------------+------------------+
+ //
+ // Suppose now the PL is in FULL state. But we don't know whether it arrived
+ // this state from NOT_FULL (like (2.3)) or from ALMOST_FULL (like (1)).
+ // We'll return to ALMOST_FULL state like (1) if we simply pop the new 1st
+ // data, but we want to make the prepending operation "reversible". So
+ // there should be some way to return to NOT_FULL if possible. A simple way
+ // to do is:
+ // - Pop 2 data out of the PL to state ALMOST_FULL or NOT_FULL.
+ // - Add the second data ("original 1st data") back.
+ //
+ // Then we can return to the correct original states of (2.1) or (1). This
+ // makes our prepending operation reversible.
+ std::vector<JoinDataType> out;
+
+ // Popping 2 data should never fail because we've just ensured that the
+ // posting list is in the FULL state.
+ ICING_RETURN_IF_ERROR(
+ GetDataInternal(posting_list_used, /*limit=*/2, /*pop=*/true, &out));
+
+ // PrependData should never fail because:
+ // - out[1] is a valid data less than all previous data in the posting list.
+ // - There's no way that the posting list could run out of room because it
+ // previously stored these 2 data.
+ ICING_RETURN_IF_ERROR(PrependData(posting_list_used, out[1]));
+ } else if (num_data > 0) {
+ return GetDataInternal(posting_list_used, /*limit=*/num_data, /*pop=*/true,
+ /*out=*/nullptr);
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::GetDataInternal(
+ const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+ std::vector<JoinDataType>* out) const {
+ uint32_t offset = GetStartByteOffset(posting_list_used);
+ uint32_t count = 0;
+
+ // First traverse the first two special positions.
+ while (count < limit && offset < kSpecialDataSize) {
+ // offset / sizeof(JoinDataType) < kNumSpecialData
+ // because of the check above.
+ SpecialDataType special_data = GetSpecialData(
+ posting_list_used, /*index=*/offset / sizeof(JoinDataType));
+ if (out != nullptr) {
+ out->push_back(special_data.data());
+ }
+ offset += sizeof(JoinDataType);
+ ++count;
+ }
+
+ // - We don't compress the data.
+ // - The posting list size is a multiple of data type bytes.
+ // So offset of the first non-special data is guaranteed to be at
+ // kSpecialDataSize if in ALMOST_FULL or FULL state. In fact, we must not
+ // apply padding skipping logic here when still storing uncompressed data,
+ // because in this case 0 bytes are meanful (e.g. inverted doc id byte = 0).
+ while (count < limit && offset < posting_list_used->size_in_bytes()) {
+ JoinDataType data = JoinDataType::GetInvalid();
+ memcpy(&data, posting_list_used->posting_list_buffer() + offset,
+ sizeof(JoinDataType));
+ offset += sizeof(JoinDataType);
+ if (out != nullptr) {
+ out->push_back(data);
+ }
+ ++count;
+ }
+
+ if (pop) {
+ PostingListUsed* mutable_posting_list_used =
+ const_cast<PostingListUsed*>(posting_list_used);
+ // Modify the posting list so that we pop all data actually traversed.
+ if (offset >= kSpecialDataSize &&
+ offset < posting_list_used->size_in_bytes()) {
+ memset(
+ mutable_posting_list_used->posting_list_buffer() + kSpecialDataSize,
+ 0, offset - kSpecialDataSize);
+ }
+ SetStartByteOffset(mutable_posting_list_used, offset);
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+typename PostingListJoinDataSerializer<JoinDataType>::SpecialDataType
+PostingListJoinDataSerializer<JoinDataType>::GetSpecialData(
+ const PostingListUsed* posting_list_used, uint32_t index) const {
+ // It is ok to temporarily construct a SpecialData with offset = 0 since we're
+ // going to overwrite it by memcpy.
+ SpecialDataType special_data(0);
+ memcpy(&special_data,
+ posting_list_used->posting_list_buffer() +
+ index * sizeof(SpecialDataType),
+ sizeof(SpecialDataType));
+ return special_data;
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::SetSpecialData(
+ PostingListUsed* posting_list_used, uint32_t index,
+ const SpecialDataType& special_data) const {
+ memcpy(posting_list_used->posting_list_buffer() +
+ index * sizeof(SpecialDataType),
+ &special_data, sizeof(SpecialDataType));
+}
+
+template <typename JoinDataType>
+bool PostingListJoinDataSerializer<JoinDataType>::IsPostingListValid(
+ const PostingListUsed* posting_list_used) const {
+ if (IsAlmostFull(posting_list_used)) {
+ // Special data 1 should hold a valid data.
+ if (!GetSpecialData(posting_list_used, /*index=*/1).data().is_valid()) {
+ ICING_LOG(ERROR)
+ << "Both special data cannot be invalid at the same time.";
+ return false;
+ }
+ } else if (!IsFull(posting_list_used)) {
+ // NOT_FULL. Special data 0 should hold a valid offset.
+ SpecialDataType special_data =
+ GetSpecialData(posting_list_used, /*index=*/0);
+ if (special_data.data_start_offset() > posting_list_used->size_in_bytes() ||
+ special_data.data_start_offset() < kSpecialDataSize) {
+ ICING_LOG(ERROR) << "Offset: " << special_data.data_start_offset()
+ << " size: " << posting_list_used->size_in_bytes()
+ << " sp size: " << kSpecialDataSize;
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename JoinDataType>
+uint32_t PostingListJoinDataSerializer<JoinDataType>::GetStartByteOffset(
+ const PostingListUsed* posting_list_used) const {
+ if (IsFull(posting_list_used)) {
+ return 0;
+ } else if (IsAlmostFull(posting_list_used)) {
+ return sizeof(JoinDataType);
+ } else {
+ return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset();
+ }
+}
+
+template <typename JoinDataType>
+bool PostingListJoinDataSerializer<JoinDataType>::SetStartByteOffset(
+ PostingListUsed* posting_list_used, uint32_t offset) const {
+ if (offset > posting_list_used->size_in_bytes()) {
+ ICING_LOG(ERROR) << "offset cannot be a value greater than size "
+ << posting_list_used->size_in_bytes() << ". offset is "
+ << offset << ".";
+ return false;
+ }
+ if (offset < kSpecialDataSize && offset > sizeof(JoinDataType)) {
+ ICING_LOG(ERROR) << "offset cannot be a value between ("
+ << sizeof(JoinDataType) << ", " << kSpecialDataSize
+ << "). offset is " << offset << ".";
+ return false;
+ }
+ if (offset < sizeof(JoinDataType) && offset != 0) {
+ ICING_LOG(ERROR) << "offset cannot be a value between (0, "
+ << sizeof(JoinDataType) << "). offset is " << offset
+ << ".";
+ return false;
+ }
+
+ if (offset >= kSpecialDataSize) {
+ // NOT_FULL state.
+ SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(offset));
+ SetSpecialData(posting_list_used, /*index=*/1,
+ SpecialDataType(JoinDataType::GetInvalid()));
+ } else if (offset == sizeof(JoinDataType)) {
+ // ALMOST_FULL state.
+ SetSpecialData(posting_list_used, /*index=*/0,
+ SpecialDataType(JoinDataType::GetInvalid()));
+ }
+ // Nothing to do for the FULL state - the offset isn't actually stored
+ // anywhere and both 2 special data hold valid data.
+ return true;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListJoinDataSerializer<JoinDataType>::PrependDataUncompressed(
+ PostingListUsed* posting_list_used, const JoinDataType& data,
+ uint32_t offset) const {
+ if (offset < kSpecialDataSize + sizeof(JoinDataType)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Not enough room to prepend JoinData at offset %d.", offset));
+ }
+ offset -= sizeof(JoinDataType);
+ memcpy(posting_list_used->posting_list_buffer() + offset, &data,
+ sizeof(JoinDataType));
+ return offset;
+}
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
diff --git a/icing/join/posting-list-join-data-serializer_test.cc b/icing/join/posting-list-join-data-serializer_test.cc
new file mode 100644
index 0000000..20137b6
--- /dev/null
+++ b/icing/join/posting-list-join-data-serializer_test.cc
@@ -0,0 +1,653 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/posting-list-join-data-serializer.h"
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/testing/common-matchers.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::Eq;
+using testing::IsEmpty;
+using testing::SizeIs;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitNotNull) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size =
+ 2551 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2))),
+ IsOk());
+ EXPECT_THAT(
+ serializer.GetMinPostingListSizeToFit(&pl_used),
+ Eq(2 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5))),
+ IsOk());
+ EXPECT_THAT(
+ serializer.GetMinPostingListSizeToFit(&pl_used),
+ Eq(3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+}
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitAlmostFull) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2))),
+ IsOk());
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5))),
+ IsOk());
+ EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitFull) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2))),
+ IsOk());
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5))),
+ IsOk());
+ ASSERT_THAT(
+ serializer.PrependData(
+ &pl_used,
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10))),
+ IsOk());
+ EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataNotFull) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size =
+ 2551 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ // Make used.
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+ /*document_id=*/0,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+ // Size = sizeof(uncompressed data0)
+ int expected_size =
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+ // Size = sizeof(uncompressed data1)
+ // + sizeof(uncompressed data0)
+ expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAre(data1, data0)));
+
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+ // Size = sizeof(uncompressed data2)
+ // + sizeof(uncompressed data1)
+ // + sizeof(uncompressed data0)
+ expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAre(data2, data1, data0)));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataAlmostFull) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 4 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ // Fill up the compressed region.
+ // Transitions:
+ // Adding data0: EMPTY -> NOT_FULL
+ // Adding data1: NOT_FULL -> NOT_FULL
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+ /*document_id=*/0,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+ EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+ int expected_size =
+ 2 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAre(data1, data0)));
+
+ // Add one more data to transition NOT_FULL -> ALMOST_FULL
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+ expected_size =
+ 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAre(data2, data1, data0)));
+
+ // Add one more data to transition ALMOST_FULL -> FULL
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data3(
+ /*document_id=*/3, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/0));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data3), IsOk());
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(size));
+ EXPECT_THAT(serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAre(data3, data2, data1, data0)));
+
+ // The posting list is FULL. Adding another data should fail.
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data4(
+ /*document_id=*/4, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/1234));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data4),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependSmallerDataShouldFail) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 4 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data(
+ /*document_id=*/100,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> smaller_data(
+ /*document_id=*/99,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+
+ // NOT_FULL -> NOT_FULL
+ ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+ EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // NOT_FULL -> ALMOST_FULL
+ ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+ EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // ALMOST_FULL -> FULL
+ ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+ EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataPostingListUsedMinSize) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = serializer.GetMinPostingListSize();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ // PL State: EMPTY
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+ EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(IsEmpty()));
+
+ // Add a data. PL should shift to ALMOST_FULL state
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+ /*document_id=*/0,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+ // Size = sizeof(uncompressed data0)
+ int expected_size =
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+ // Add another data. PL should shift to FULL state.
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+ // Size = sizeof(uncompressed data1) + sizeof(uncompressed data0)
+ expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+ EXPECT_THAT(serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAre(data1, data0)));
+
+ // The posting list is FULL. Adding another data should fail.
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10));
+ EXPECT_THAT(serializer.PrependData(&pl_used, data2),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataArrayDoNotKeepPrepended) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 6 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_in;
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_pushed;
+
+ // Add 3 data. The PL is in the empty state and should be able to fit all 3
+ // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/10)));
+ EXPECT_THAT(
+ serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_in.size()));
+ std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+ Eq(data_pushed.size() *
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+ // Add 2 data. The PL should transition from NOT_FULL to ALMOST_FULL.
+ data_in.clear();
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/3,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/0)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/4, NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*fingerprint=*/1234)));
+ EXPECT_THAT(
+ serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_in.size()));
+ std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+ Eq(data_pushed.size() *
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+ // Add 2 data. The PL should remain ALMOST_FULL since the remaining space can
+ // only fit 1 data.
+ data_in.clear();
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+ /*fingerprint=*/99)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/6, NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+ /*fingerprint=*/63)));
+ EXPECT_THAT(
+ serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(0));
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+ Eq(data_pushed.size() *
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+ // Add 1 data. The PL should transition from ALMOST_FULL to FULL.
+ data_in.pop_back();
+ ASSERT_THAT(data_in, SizeIs(1));
+ EXPECT_THAT(
+ serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_in.size()));
+ std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+ Eq(data_pushed.size() *
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataArrayKeepPrepended) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 6 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_in;
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_pushed;
+
+ // Add 3 data. The PL is in the empty state and should be able to fit all 3
+ // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/10)));
+ EXPECT_THAT(
+ serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+ /*keep_prepended=*/true),
+ IsOkAndHolds(data_in.size()));
+ std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+ Eq(data_pushed.size() *
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+ // Add 4 data. The PL should prepend 3 data and transition from NOT_FULL to
+ // FULL.
+ data_in.clear();
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/3,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/0)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/4, NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*fingerprint=*/1234)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+ /*fingerprint=*/99)));
+ data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/6, NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+ /*fingerprint=*/63)));
+ EXPECT_THAT(
+ serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+ /*keep_prepended=*/true),
+ IsOkAndHolds(3));
+ data_in.pop_back();
+ ASSERT_THAT(data_in, SizeIs(3));
+ std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+ EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+ Eq(data_pushed.size() *
+ sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveFrom) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 3 * serializer.GetMinPostingListSize();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr1 =
+ {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5))};
+ ASSERT_THAT(
+ serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_arr1.size()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used2,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr2 =
+ {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/3, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/0)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/4,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*fingerprint=*/1234)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+ /*fingerprint=*/99))};
+ ASSERT_THAT(
+ serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_arr2.size()));
+
+ EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+ IsOk());
+ EXPECT_THAT(
+ serializer.GetData(&pl_used2),
+ IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+ EXPECT_THAT(serializer.GetData(&pl_used1), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveToNullReturnsFailedPrecondition) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 3 * serializer.GetMinPostingListSize();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr = {
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5))};
+ ASSERT_THAT(
+ serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_arr.size()));
+
+ EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used, /*src=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+ EXPECT_THAT(serializer.MoveFrom(/*dst=*/nullptr, /*src=*/&pl_used),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveToPostingListTooSmall) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size1 = 3 * serializer.GetMinPostingListSize();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size1));
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr1 =
+ {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/3, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/0)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/4,
+ NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*fingerprint=*/1234))};
+ ASSERT_THAT(
+ serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_arr1.size()));
+
+ int size2 = serializer.GetMinPostingListSize();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used2,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size2));
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr2 =
+ {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/2, /*fingerprint=*/99))};
+ ASSERT_THAT(
+ serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_arr2.size()));
+
+ EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used1),
+ IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+ EXPECT_THAT(
+ serializer.GetData(&pl_used2),
+ IsOkAndHolds(ElementsAreArray(data_arr2.rbegin(), data_arr2.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, PopFrontData) {
+ PostingListJoinDataSerializer<
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+ serializer;
+
+ int size = 2 * serializer.GetMinPostingListSize();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+ std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr = {
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/2)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/5)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/10))};
+ ASSERT_THAT(
+ serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+ /*keep_prepended=*/false),
+ IsOkAndHolds(data_arr.size()));
+ ASSERT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+ // Now, pop the last data. The posting list should contain the first three
+ // data.
+ EXPECT_THAT(serializer.PopFrontData(&pl_used, /*num_data=*/1), IsOk());
+ data_arr.pop_back();
+ EXPECT_THAT(
+ serializer.GetData(&pl_used),
+ IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-index.cc b/icing/join/qualified-id-join-index-impl-v1.cc
index 07b5627..cdcb5a9 100644
--- a/icing/join/qualified-id-join-index.cc
+++ b/icing/join/qualified-id-join-index-impl-v1.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
#include <cstring>
#include <memory>
@@ -29,9 +29,11 @@
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
#include "icing/store/document-id.h"
#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-id.h"
#include "icing/store/persistent-hash-map-key-mapper.h"
#include "icing/util/crc32.h"
#include "icing/util/encode-util.h"
@@ -74,17 +76,20 @@ std::string GetQualifiedIdStoragePath(std::string_view working_path) {
} // namespace
-/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-QualifiedIdJoinIndex::Create(const Filesystem& filesystem,
- std::string working_path, bool pre_mapping_fbv,
- bool use_persistent_hash_map) {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::Create(const Filesystem& filesystem,
+ std::string working_path,
+ bool pre_mapping_fbv,
+ bool use_persistent_hash_map) {
if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
!filesystem.DirectoryExists(
GetDocJoinInfoMapperPath(working_path).c_str()) ||
!filesystem.FileExists(GetQualifiedIdStoragePath(working_path).c_str())) {
// Discard working_path if any file/directory is missing, and reinitialize.
if (filesystem.DirectoryExists(working_path.c_str())) {
- ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ ICING_RETURN_IF_ERROR(
+ QualifiedIdJoinIndex::Discard(filesystem, working_path));
}
return InitializeNewFiles(filesystem, std::move(working_path),
pre_mapping_fbv, use_persistent_hash_map);
@@ -93,7 +98,7 @@ QualifiedIdJoinIndex::Create(const Filesystem& filesystem,
pre_mapping_fbv, use_persistent_hash_map);
}
-QualifiedIdJoinIndex::~QualifiedIdJoinIndex() {
+QualifiedIdJoinIndexImplV1::~QualifiedIdJoinIndexImplV1() {
if (!PersistToDisk().ok()) {
ICING_LOG(WARNING) << "Failed to persist qualified id type joinable index "
"to disk while destructing "
@@ -101,7 +106,7 @@ QualifiedIdJoinIndex::~QualifiedIdJoinIndex() {
}
}
-libtextclassifier3::Status QualifiedIdJoinIndex::Put(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Put(
const DocJoinInfo& doc_join_info, std::string_view ref_qualified_id_str) {
SetDirty();
@@ -128,7 +133,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Put(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndex::Get(
+libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndexImplV1::Get(
const DocJoinInfo& doc_join_info) const {
if (!doc_join_info.is_valid()) {
return absl_ports::InvalidArgumentError(
@@ -144,11 +149,13 @@ libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndex::Get(
return std::string_view(data, strlen(data));
}
-libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
DocumentId new_last_added_document_id) {
std::string temp_working_path = working_path_ + "_temp";
- ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
+ ICING_RETURN_IF_ERROR(
+ QualifiedIdJoinIndex::Discard(filesystem_, temp_working_path));
DestructibleDirectory temp_working_path_ddir(&filesystem_,
std::move(temp_working_path));
@@ -162,9 +169,10 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
// Transfer all data from the current to new qualified id type joinable
// index. Also PersistToDisk and destruct the instance after finishing, so
// we can safely swap directories later.
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndex> new_index,
- Create(filesystem_, temp_working_path_ddir.dir(),
- pre_mapping_fbv_, use_persistent_hash_map_));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> new_index,
+ Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_,
+ use_persistent_hash_map_));
ICING_RETURN_IF_ERROR(
TransferIndex(document_id_old_to_new, new_index.get()));
new_index->set_last_added_document_id(new_last_added_document_id);
@@ -216,7 +224,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status QualifiedIdJoinIndex::Clear() {
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Clear() {
SetDirty();
doc_join_info_mapper_.reset();
@@ -252,11 +260,12 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Clear() {
return libtextclassifier3::Status::OK;
}
-/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
- std::string&& working_path,
- bool pre_mapping_fbv,
- bool use_persistent_hash_map) {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::InitializeNewFiles(const Filesystem& filesystem,
+ std::string&& working_path,
+ bool pre_mapping_fbv,
+ bool use_persistent_hash_map) {
// Create working directory.
if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
return absl_ports::InternalError(
@@ -291,8 +300,8 @@ QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
/*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
// Create instance.
- auto new_index =
- std::unique_ptr<QualifiedIdJoinIndex>(new QualifiedIdJoinIndex(
+ auto new_index = std::unique_ptr<QualifiedIdJoinIndexImplV1>(
+ new QualifiedIdJoinIndexImplV1(
filesystem, std::move(working_path),
/*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
std::move(doc_join_info_mapper), std::move(qualified_id_storage),
@@ -307,11 +316,11 @@ QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
return new_index;
}
-/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
- std::string&& working_path,
- bool pre_mapping_fbv,
- bool use_persistent_hash_map) {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ bool pre_mapping_fbv, bool use_persistent_hash_map) {
// PRead metadata file.
auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
@@ -358,8 +367,8 @@ QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
/*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
// Create instance.
- auto type_joinable_index =
- std::unique_ptr<QualifiedIdJoinIndex>(new QualifiedIdJoinIndex(
+ auto type_joinable_index = std::unique_ptr<QualifiedIdJoinIndexImplV1>(
+ new QualifiedIdJoinIndexImplV1(
filesystem, std::move(working_path), std::move(metadata_buffer),
std::move(doc_join_info_mapper), std::move(qualified_id_storage),
pre_mapping_fbv, use_persistent_hash_map));
@@ -374,9 +383,9 @@ QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
return type_joinable_index;
}
-libtextclassifier3::Status QualifiedIdJoinIndex::TransferIndex(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::TransferIndex(
const std::vector<DocumentId>& document_id_old_to_new,
- QualifiedIdJoinIndex* new_index) const {
+ QualifiedIdJoinIndexImplV1* new_index) const {
std::unique_ptr<KeyMapper<int32_t>::Iterator> iter =
doc_join_info_mapper_->GetIterator();
while (iter->Advance()) {
@@ -404,7 +413,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::TransferIndex(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::PersistMetadataToDisk(
bool force) {
if (!force && !is_info_dirty() && !is_storage_dirty()) {
return libtextclassifier3::Status::OK;
@@ -429,7 +438,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::PersistStoragesToDisk(
bool force) {
if (!force && !is_storage_dirty()) {
return libtextclassifier3::Status::OK;
@@ -440,8 +449,8 @@ libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<Crc32> QualifiedIdJoinIndex::ComputeInfoChecksum(
- bool force) {
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV1::ComputeInfoChecksum(bool force) {
if (!force && !is_info_dirty()) {
return Crc32(crcs().component_crcs.info_crc);
}
@@ -450,7 +459,7 @@ libtextclassifier3::StatusOr<Crc32> QualifiedIdJoinIndex::ComputeInfoChecksum(
}
libtextclassifier3::StatusOr<Crc32>
-QualifiedIdJoinIndex::ComputeStoragesChecksum(bool force) {
+QualifiedIdJoinIndexImplV1::ComputeStoragesChecksum(bool force) {
if (!force && !is_storage_dirty()) {
return Crc32(crcs().component_crcs.storages_crc);
}
diff --git a/icing/join/qualified-id-join-index-impl-v1.h b/icing/join/qualified-id-join-index-impl-v1.h
new file mode 100644
index 0000000..9314602
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v1.h
@@ -0,0 +1,327 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndexImplV1: a class to maintain data mapping DocJoinInfo to
+// joinable qualified ids and delete propagation info.
+class QualifiedIdJoinIndexImplV1 : public QualifiedIdJoinIndex {
+ public:
+ struct Info {
+ static constexpr int32_t kMagic = 0x48cabdc6;
+
+ int32_t magic;
+ DocumentId last_added_document_id;
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(
+ std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+ }
+ } __attribute__((packed));
+ static_assert(sizeof(Info) == 8, "");
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+ static constexpr int32_t kInfoMetadataBufferOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 20, "");
+
+ // Creates a QualifiedIdJoinIndexImplV1 instance to store qualified ids for
+ // future joining search. If any of the underlying file is missing, then
+ // delete the whole working_path and (re)initialize with new ones. Otherwise
+ // initialize and create the instance by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // QualifiedIdJoinIndexImplV1 uses working path as working
+ // directory and all related files will be stored under this
+ // directory. It takes full ownership and of working_path_,
+ // including creation/deletion. It is the caller's
+ // responsibility to specify correct working path and avoid
+ // mixing different persistent storages together under the same
+ // path. Also the caller has the ownership for the parent
+ // directory of working_path_, and it is responsible for parent
+ // directory creation/deletion. See PersistentStorage for more
+ // details about the concept of working_path.
+ // pre_mapping_fbv: flag indicating whether memory map max possible file size
+ // for underlying FileBackedVector before growing the actual
+ // file size.
+ // use_persistent_hash_map: flag indicating whether use persistent hash map as
+ // the key mapper (if false, then fall back to
+ // dynamic trie key mapper).
+ //
+ // Returns:
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum
+ // - INTERNAL_ERROR on I/O errors
+ // - Any KeyMapper errors
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+ Create(const Filesystem& filesystem, std::string working_path,
+ bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+ // Delete copy and move constructor/assignment operator.
+ QualifiedIdJoinIndexImplV1(const QualifiedIdJoinIndexImplV1&) = delete;
+ QualifiedIdJoinIndexImplV1& operator=(const QualifiedIdJoinIndexImplV1&) =
+ delete;
+
+ QualifiedIdJoinIndexImplV1(QualifiedIdJoinIndexImplV1&&) = delete;
+ QualifiedIdJoinIndexImplV1& operator=(QualifiedIdJoinIndexImplV1&&) = delete;
+
+ ~QualifiedIdJoinIndexImplV1() override;
+
+ // v2 only API. Returns UNIMPLEMENTED_ERROR.
+ libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id,
+ DocumentId document_id,
+ std::vector<NamespaceFingerprintIdentifier>&&
+ ref_namespace_fingerprint_ids) override {
+ return absl_ports::UnimplementedError("This API is not supported in V2");
+ }
+
+ // v2 only API. Returns UNIMPLEMENTED_ERROR.
+ libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+ GetIterator(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const override {
+ return absl_ports::UnimplementedError("This API is not supported in V2");
+ }
+
+ // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
+ // references to ref_qualified_id_str (the identifier of another document).
+ //
+ // REQUIRES: ref_qualified_id_str contains no '\0'.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+ // - Any KeyMapper errors
+ libtextclassifier3::Status Put(
+ const DocJoinInfo& doc_join_info,
+ std::string_view ref_qualified_id_str) override;
+
+ // Gets the referenced document's qualified id string by DocJoinInfo.
+ //
+ // Returns:
+ // - A qualified id string referenced by the given DocJoinInfo (DocumentId,
+ // JoinablePropertyId) on success
+ // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+ // - NOT_FOUND_ERROR if doc_join_info doesn't exist
+ // - Any KeyMapper errors
+ libtextclassifier3::StatusOr<std::string_view> Get(
+ const DocJoinInfo& doc_join_info) const override;
+
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Qualified id type joinable index will convert all entries to the
+ // new document ids.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - namespace_id_old_to_new: a map for converting old namespace id to new
+ // namespace id. It is unused in this implementation since we store raw
+ // qualified id string (which contains raw namespace string).
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the qualified id type joinable
+ // index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+ // an invalid state and the caller should handle it properly (e.g. discard
+ // and rebuild)
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
+
+ // Clears all data and set last_added_document_id to kInvalidDocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Clear() override;
+
+ bool is_v2() const override { return false; }
+
+ int32_t size() const override { return doc_join_info_mapper_->num_keys(); }
+
+ bool empty() const override { return size() == 0; }
+
+ DocumentId last_added_document_id() const override {
+ return info().last_added_document_id;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) override {
+ SetInfoDirty();
+
+ Info& info_ref = info();
+ if (info_ref.last_added_document_id == kInvalidDocumentId ||
+ document_id > info_ref.last_added_document_id) {
+ info_ref.last_added_document_id = document_id;
+ }
+ }
+
+ private:
+ explicit QualifiedIdJoinIndexImplV1(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<uint8_t[]> metadata_buffer,
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+ bool pre_mapping_fbv, bool use_persistent_hash_map)
+ : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
+ metadata_buffer_(std::move(metadata_buffer)),
+ doc_join_info_mapper_(std::move(doc_join_info_mapper)),
+ qualified_id_storage_(std::move(qualified_id_storage)),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ use_persistent_hash_map_(use_persistent_hash_map),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
+
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+ InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path, bool pre_mapping_fbv,
+ bool use_persistent_hash_map);
+
+ // Transfers qualified id join index data from the current to new_index and
+ // convert to new document id according to document_id_old_to_new. It is a
+ // helper function for Optimize.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ QualifiedIdJoinIndexImplV1* new_index) const;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+ // Computes and returns all storages checksum.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
+ // Metadata buffer
+ std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+ // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
+ // JoinablePropertyId) to another referenced document's qualified id string
+ // index in qualified_id_storage_.
+ std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+
+ // Storage for qualified id strings.
+ std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
+
+ // TODO(b/268521214): add delete propagation storage
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+
+ // Flag indicating whether use persistent hash map as the key mapper (if
+ // false, then fall back to dynamic trie key mapper).
+ bool use_persistent_hash_map_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
diff --git a/icing/join/qualified-id-join-index_test.cc b/icing/join/qualified-id-join-index-impl-v1_test.cc
index 3d59f4b..a6e19bb 100644
--- a/icing/join/qualified-id-join-index_test.cc
+++ b/icing/join/qualified-id-join-index-impl-v1_test.cc
@@ -12,8 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include <cstdint>
#include <memory>
#include <string>
#include <string_view>
@@ -49,22 +50,22 @@ using ::testing::Pointee;
using ::testing::SizeIs;
using Crcs = PersistentStorage::Crcs;
-using Info = QualifiedIdJoinIndex::Info;
+using Info = QualifiedIdJoinIndexImplV1::Info;
static constexpr int32_t kCorruptedValueOffset = 3;
-struct QualifiedIdJoinIndexTestParam {
+struct QualifiedIdJoinIndexImplV1TestParam {
bool pre_mapping_fbv;
bool use_persistent_hash_map;
- explicit QualifiedIdJoinIndexTestParam(bool pre_mapping_fbv_in,
- bool use_persistent_hash_map_in)
+ explicit QualifiedIdJoinIndexImplV1TestParam(bool pre_mapping_fbv_in,
+ bool use_persistent_hash_map_in)
: pre_mapping_fbv(pre_mapping_fbv_in),
use_persistent_hash_map(use_persistent_hash_map_in) {}
};
-class QualifiedIdJoinIndexTest
- : public ::testing::TestWithParam<QualifiedIdJoinIndexTestParam> {
+class QualifiedIdJoinIndexImplV1Test
+ : public ::testing::TestWithParam<QualifiedIdJoinIndexImplV1TestParam> {
protected:
void SetUp() override {
base_dir_ = GetTestTempDir() + "/icing";
@@ -83,26 +84,26 @@ class QualifiedIdJoinIndexTest
std::string working_path_;
};
-TEST_P(QualifiedIdJoinIndexTest, InvalidWorkingPath) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidWorkingPath) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
- EXPECT_THAT(QualifiedIdJoinIndex::Create(
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(
filesystem_, "/dev/null/qualified_id_join_index_test",
param.pre_mapping_fbv, param.use_persistent_hash_map),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InitializeNewFiles) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index, Pointee(IsEmpty()));
ICING_ASSERT_OK(index->PersistToDisk());
@@ -112,23 +113,25 @@ TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
// sections.
const std::string metadata_file_path =
absl_ports::StrCat(working_path_, "/metadata");
- auto metadata_buffer =
- std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize);
ASSERT_THAT(
filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize,
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize,
/*offset=*/0),
IsTrue());
// Check info section
const Info* info = reinterpret_cast<const Info*>(
- metadata_buffer.get() + QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
EXPECT_THAT(info->magic, Eq(Info::kMagic));
EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
// Check crcs section
const Crcs* crcs = reinterpret_cast<const Crcs*>(
- metadata_buffer.get() + QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
// There are some initial info in KeyMapper, so storages_crc should be
// non-zero.
EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
@@ -143,16 +146,16 @@ TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
.Get()));
}
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
InitializationShouldFailWithoutPersistToDiskOrDestruction) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
// Insert some data.
ICING_ASSERT_OK(
@@ -168,23 +171,24 @@ TEST_P(QualifiedIdJoinIndexTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
StatusIs(param.use_persistent_hash_map
? libtextclassifier3::StatusCode::FAILED_PRECONDITION
: libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+ InitializationShouldSucceedWithPersistToDisk) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index1,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index1,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
// Insert some data.
ICING_ASSERT_OK(
@@ -204,10 +208,10 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_EXPECT_OK(index1->PersistToDisk());
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index2,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index2,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index2, Pointee(SizeIs(3)));
EXPECT_THAT(
index2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)),
@@ -220,16 +224,17 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriC"));
}
-TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+ InitializationShouldSucceedAfterDestruction) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
// Insert some data.
ICING_ASSERT_OK(
@@ -250,10 +255,10 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
// thus initializing another instance on the same files should succeed, and
// we should be able to get the same contents.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index, Pointee(SizeIs(3)));
EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/1,
/*joinable_property_id=*/20)),
@@ -267,17 +272,17 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
}
}
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
InitializeExistingFilesWithDifferentMagicShouldFail) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
/*ref_qualified_id_str=*/"namespace#uriA"));
@@ -292,49 +297,49 @@ TEST_P(QualifiedIdJoinIndexTest,
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
- auto metadata_buffer =
- std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize);
ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize,
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize,
/*offset=*/0),
IsTrue());
// Manually change magic and update checksums.
Crcs* crcs = reinterpret_cast<Crcs*>(
metadata_buffer.get() +
- QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+ QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
Info* info = reinterpret_cast<Info*>(
metadata_buffer.get() +
- QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+ QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
info->magic += kCorruptedValueOffset;
crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
- ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
- metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize),
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize),
IsTrue());
}
// Attempt to create the qualified id join index with different magic. This
// should fail.
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
HasSubstr("Incorrect magic value")));
}
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
InitializeExistingFilesWithWrongAllCrcShouldFail) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
/*ref_qualified_id_str=*/"namespace#uriA"));
@@ -348,45 +353,45 @@ TEST_P(QualifiedIdJoinIndexTest,
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
- auto metadata_buffer =
- std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize);
ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize,
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize,
/*offset=*/0),
IsTrue());
// Manually corrupt all_crc
Crcs* crcs = reinterpret_cast<Crcs*>(
metadata_buffer.get() +
- QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+ QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
crcs->all_crc += kCorruptedValueOffset;
- ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
- metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize),
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize),
IsTrue());
}
// Attempt to create the qualified id join index with metadata containing
// corrupted all_crc. This should fail.
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
HasSubstr("Invalid all crc")));
}
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
InitializeExistingFilesWithCorruptedInfoShouldFail) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
/*ref_qualified_id_str=*/"namespace#uriA"));
@@ -400,10 +405,10 @@ TEST_P(QualifiedIdJoinIndexTest,
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
- auto metadata_buffer =
- std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize);
ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize,
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize,
/*offset=*/0),
IsTrue());
@@ -411,35 +416,35 @@ TEST_P(QualifiedIdJoinIndexTest,
// corruption of info.
Info* info = reinterpret_cast<Info*>(
metadata_buffer.get() +
- QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+ QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
info->last_added_document_id += kCorruptedValueOffset;
- ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
- metadata_buffer.get(),
- QualifiedIdJoinIndex::kMetadataFileSize),
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV1::kMetadataFileSize),
IsTrue());
}
// Attempt to create the qualified id join index with info that doesn't match
// its checksum. This should fail.
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
HasSubstr("Invalid info crc")));
}
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
InitializeExistingFilesWithCorruptedDocJoinInfoMapperShouldFail) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
/*ref_qualified_id_str=*/"namespace#uriA"));
@@ -472,24 +477,24 @@ TEST_P(QualifiedIdJoinIndexTest,
// Attempt to create the qualified id join index with corrupted
// doc_join_info_mapper. This should fail.
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
HasSubstr("Invalid storages crc")));
}
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
InitializeExistingFilesWithCorruptedQualifiedIdStorageShouldFail) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
/*ref_qualified_id_str=*/"namespace#uriA"));
@@ -518,22 +523,22 @@ TEST_P(QualifiedIdJoinIndexTest,
// Attempt to create the qualified id join index with corrupted
// qualified_id_storage. This should fail.
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
HasSubstr("Invalid storages crc")));
}
-TEST_P(QualifiedIdJoinIndexTest, InvalidPut) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidPut) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
DocJoinInfo default_invalid;
EXPECT_THAT(
@@ -541,23 +546,23 @@ TEST_P(QualifiedIdJoinIndexTest, InvalidPut) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_P(QualifiedIdJoinIndexTest, InvalidGet) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidGet) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
DocJoinInfo default_invalid;
EXPECT_THAT(index->Get(default_invalid),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, PutAndGet) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
std::string_view ref_qualified_id_str_a = "namespace#uriA";
@@ -571,10 +576,10 @@ TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index->Put(target_info1, ref_qualified_id_str_a), IsOk());
EXPECT_THAT(index->Put(target_info2, ref_qualified_id_str_b), IsOk());
@@ -590,28 +595,28 @@ TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
// Verify we can get all of them after destructing and re-initializing.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index, Pointee(SizeIs(3)));
EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
}
-TEST_P(QualifiedIdJoinIndexTest, GetShouldReturnNotFoundErrorIfNotExist) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, GetShouldReturnNotFoundErrorIfNotExist) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20);
std::string_view ref_qualified_id_str = "namespace#uriA";
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
// Verify entry is not found in the beginning.
EXPECT_THAT(index->Get(target_info),
@@ -627,14 +632,14 @@ TEST_P(QualifiedIdJoinIndexTest, GetShouldReturnNotFoundErrorIfNotExist) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_P(QualifiedIdJoinIndexTest, SetLastAddedDocumentId) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, SetLastAddedDocumentId) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -648,15 +653,15 @@ TEST_P(QualifiedIdJoinIndexTest, SetLastAddedDocumentId) {
}
TEST_P(
- QualifiedIdJoinIndexTest,
+ QualifiedIdJoinIndexImplV1Test,
SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
constexpr DocumentId kDocumentId = 123;
index->set_last_added_document_id(kDocumentId);
@@ -669,14 +674,14 @@ TEST_P(
EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
}
-TEST_P(QualifiedIdJoinIndexTest, Optimize) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, Optimize) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
@@ -705,7 +710,8 @@ TEST_P(QualifiedIdJoinIndexTest, Optimize) {
DocumentId new_last_added_document_id = 2;
EXPECT_THAT(
- index->Optimize(document_id_old_to_new, new_last_added_document_id),
+ index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
+ new_last_added_document_id),
IsOk());
EXPECT_THAT(index, Pointee(SizeIs(3)));
EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
@@ -750,14 +756,14 @@ TEST_P(QualifiedIdJoinIndexTest, Optimize) {
IsOkAndHolds("namespace#uriD"));
}
-TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, OptimizeOutOfRangeDocumentId) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/10),
@@ -770,7 +776,7 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
// There shouldn't be any error due to vector index.
EXPECT_THAT(
- index->Optimize(document_id_old_to_new,
+ index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
/*new_last_added_document_id=*/kInvalidDocumentId),
IsOk());
EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -779,14 +785,14 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
EXPECT_THAT(index, Pointee(IsEmpty()));
}
-TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, OptimizeDeleteAll) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
@@ -809,7 +815,7 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
EXPECT_THAT(
- index->Optimize(document_id_old_to_new,
+ index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
/*new_last_added_document_id=*/kInvalidDocumentId),
IsOk());
EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -818,8 +824,8 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
EXPECT_THAT(index, Pointee(IsEmpty()));
}
-TEST_P(QualifiedIdJoinIndexTest, Clear) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, Clear) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/5);
@@ -827,10 +833,10 @@ TEST_P(QualifiedIdJoinIndexTest, Clear) {
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(target_info1, /*ref_qualified_id_str=*/"namespace#uriA"));
ICING_ASSERT_OK(
@@ -867,9 +873,9 @@ TEST_P(QualifiedIdJoinIndexTest, Clear) {
// Verify index after reconstructing.
ICING_ASSERT_OK_AND_ASSIGN(
- index, QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ index, QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
EXPECT_THAT(index->last_added_document_id(), Eq(2));
EXPECT_THAT(index->Get(target_info1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -880,16 +886,16 @@ TEST_P(QualifiedIdJoinIndexTest, Clear) {
EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
}
-TEST_P(QualifiedIdJoinIndexTest, SwitchKeyMapperTypeShouldReturnError) {
- const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, SwitchKeyMapperTypeShouldReturnError) {
+ const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
{
// Create new qualified id join index
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinIndex> index,
- QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- param.use_persistent_hash_map));
+ std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+ QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ param.use_persistent_hash_map));
ICING_ASSERT_OK(
index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
/*ref_qualified_id_str=*/"namespace#uriA"));
@@ -898,23 +904,26 @@ TEST_P(QualifiedIdJoinIndexTest, SwitchKeyMapperTypeShouldReturnError) {
}
bool switch_key_mapper_flag = !param.use_persistent_hash_map;
- EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
- param.pre_mapping_fbv,
- switch_key_mapper_flag),
+ EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+ param.pre_mapping_fbv,
+ switch_key_mapper_flag),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
INSTANTIATE_TEST_SUITE_P(
- QualifiedIdJoinIndexTest, QualifiedIdJoinIndexTest,
- testing::Values(
- QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true,
- /*use_persistent_hash_map_in=*/true),
- QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true,
- /*use_persistent_hash_map_in=*/false),
- QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false,
- /*use_persistent_hash_map_in=*/true),
- QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false,
- /*use_persistent_hash_map_in=*/false)));
+ QualifiedIdJoinIndexImplV1Test, QualifiedIdJoinIndexImplV1Test,
+ testing::Values(QualifiedIdJoinIndexImplV1TestParam(
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/true),
+ QualifiedIdJoinIndexImplV1TestParam(
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/false),
+ QualifiedIdJoinIndexImplV1TestParam(
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/true),
+ QualifiedIdJoinIndexImplV1TestParam(
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/false)));
} // namespace
diff --git a/icing/join/qualified-id-join-index-impl-v2.cc b/icing/join/qualified-id-join-index-impl-v2.cc
new file mode 100644
index 0000000..70fd13c
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2.cc
@@ -0,0 +1,681 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-accessor.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Set 1M for max # of qualified id entries and 10 bytes for key-value bytes.
+// This will take at most 23 MiB disk space and mmap for persistent hash map.
+static constexpr int32_t kSchemaJoinableIdToPostingListMapperMaxNumEntries =
+ 1 << 20;
+static constexpr int32_t kSchemaJoinableIdToPostingListMapperAverageKVByteSize =
+ 10;
+
+inline DocumentId GetNewDocumentId(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId old_document_id) {
+ if (old_document_id >= document_id_old_to_new.size()) {
+ return kInvalidDocumentId;
+ }
+ return document_id_old_to_new[old_document_id];
+}
+
+inline NamespaceId GetNewNamespaceId(
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ NamespaceId namespace_id) {
+ if (namespace_id >= namespace_id_old_to_new.size()) {
+ return kInvalidNamespaceId;
+ }
+ return namespace_id_old_to_new[namespace_id];
+}
+
+libtextclassifier3::StatusOr<PostingListIdentifier> GetPostingListIdentifier(
+ const KeyMapper<PostingListIdentifier>&
+ schema_joinable_id_to_posting_list_mapper,
+ const std::string& encoded_schema_type_joinable_property_id_str) {
+ auto posting_list_identifier_or =
+ schema_joinable_id_to_posting_list_mapper.Get(
+ encoded_schema_type_joinable_property_id_str);
+ if (!posting_list_identifier_or.ok()) {
+ if (absl_ports::IsNotFound(posting_list_identifier_or.status())) {
+ // Not found. Return invalid posting list id.
+ return PostingListIdentifier::kInvalid;
+ }
+ // Real error.
+ return posting_list_identifier_or;
+ }
+ return std::move(posting_list_identifier_or).ValueOrDie();
+}
+
+libtextclassifier3::StatusOr<std::string> EncodeSchemaTypeJoinablePropertyId(
+ SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id) {
+ if (schema_type_id < 0) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
+
+ if (!IsJoinablePropertyIdValid(joinable_property_id)) {
+ return absl_ports::InvalidArgumentError("Invalid joinable property id");
+ }
+
+ static constexpr int kEncodedSchemaTypeIdLength = 3;
+
+ // encoded_schema_type_id_str should be 1 to 3 bytes based on the value of
+ // schema_type_id.
+ std::string encoded_schema_type_id_str =
+ encode_util::EncodeIntToCString(schema_type_id);
+ // Make encoded_schema_type_id_str to fixed kEncodedSchemaTypeIdLength bytes.
+ while (encoded_schema_type_id_str.size() < kEncodedSchemaTypeIdLength) {
+ // C string cannot contain 0 bytes, so we append it using 1, just like what
+ // we do in encode_util::EncodeIntToCString.
+ //
+ // The reason that this works is because DecodeIntToString decodes a byte
+ // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded
+ // schema type id that is less than 3 bytes, it means that the id contains
+ // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+ // 0x01.
+ encoded_schema_type_id_str.push_back(1);
+ }
+
+ return absl_ports::StrCat(
+ encoded_schema_type_id_str,
+ encode_util::EncodeIntToCString(joinable_property_id));
+}
+
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/metadata");
+}
+
+std::string GetSchemaJoinableIdToPostingListMapperPath(
+ std::string_view working_path) {
+ return absl_ports::StrCat(working_path,
+ "/schema_joinable_id_to_posting_list_mapper");
+}
+
+std::string GetFlashIndexStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/flash_index_storage");
+}
+
+} // namespace
+
+libtextclassifier3::Status
+QualifiedIdJoinIndexImplV2::JoinDataIterator::Advance() {
+ if (pl_accessor_ == nullptr) {
+ return absl_ports::ResourceExhaustedError("End of iterator");
+ }
+
+ if (!should_retrieve_next_batch_) {
+ // In this case, cached_batch_join_data_ is not empty (contains some data
+ // fetched in the previous round), so move curr_ to the next position and
+ // check if we have to fetch the next batch.
+ //
+ // Note: in the 1st round, should_retrieve_next_batch_ is true, so this part
+ // will never be executed.
+ ++curr_;
+ should_retrieve_next_batch_ = curr_ >= cached_batch_join_data_.cend();
+ }
+
+ if (should_retrieve_next_batch_) {
+ // Fetch next batch if needed.
+ ICING_RETURN_IF_ERROR(GetNextDataBatch());
+ should_retrieve_next_batch_ = false;
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status
+QualifiedIdJoinIndexImplV2::JoinDataIterator::GetNextDataBatch() {
+ auto cached_batch_join_data_or = pl_accessor_->GetNextDataBatch();
+ if (!cached_batch_join_data_or.ok()) {
+ ICING_LOG(WARNING)
+ << "Fail to get next batch data from posting list due to: "
+ << cached_batch_join_data_or.status().error_message();
+ return std::move(cached_batch_join_data_or).status();
+ }
+
+ cached_batch_join_data_ = std::move(cached_batch_join_data_or).ValueOrDie();
+ curr_ = cached_batch_join_data_.cbegin();
+
+ if (cached_batch_join_data_.empty()) {
+ return absl_ports::ResourceExhaustedError("End of iterator");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::Create(const Filesystem& filesystem,
+ std::string working_path,
+ bool pre_mapping_fbv) {
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.DirectoryExists(
+ GetSchemaJoinableIdToPostingListMapperPath(working_path).c_str()) ||
+ !filesystem.FileExists(
+ GetFlashIndexStorageFilePath(working_path).c_str())) {
+ // Discard working_path if any file/directory is missing, and reinitialize.
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(
+ QualifiedIdJoinIndex::Discard(filesystem, working_path));
+ }
+ return InitializeNewFiles(filesystem, std::move(working_path),
+ pre_mapping_fbv);
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path),
+ pre_mapping_fbv);
+}
+
+QualifiedIdJoinIndexImplV2::~QualifiedIdJoinIndexImplV2() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING) << "Failed to persist qualified id join index (v2) to "
+ "disk while destructing "
+ << working_path_;
+ }
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Put(
+ SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
+ DocumentId document_id,
+ std::vector<NamespaceFingerprintIdentifier>&&
+ ref_namespace_fingerprint_ids) {
+ std::sort(ref_namespace_fingerprint_ids.begin(),
+ ref_namespace_fingerprint_ids.end());
+
+ // Dedupe.
+ auto last = std::unique(ref_namespace_fingerprint_ids.begin(),
+ ref_namespace_fingerprint_ids.end());
+ ref_namespace_fingerprint_ids.erase(last,
+ ref_namespace_fingerprint_ids.end());
+ if (ref_namespace_fingerprint_ids.empty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ SetDirty();
+ ICING_ASSIGN_OR_RETURN(
+ std::string encoded_schema_type_joinable_property_id_str,
+ EncodeSchemaTypeJoinablePropertyId(schema_type_id, joinable_property_id));
+
+ ICING_ASSIGN_OR_RETURN(
+ PostingListIdentifier posting_list_identifier,
+ GetPostingListIdentifier(*schema_joinable_id_to_posting_list_mapper_,
+ encoded_schema_type_joinable_property_id_str));
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor;
+ if (posting_list_identifier.is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_.get(),
+ posting_list_identifier));
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ flash_index_storage_.get(), posting_list_serializer_.get()));
+ }
+
+ // Prepend join data into posting list.
+ for (const NamespaceFingerprintIdentifier& ref_namespace_fingerprint_id :
+ ref_namespace_fingerprint_ids) {
+ ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ document_id, ref_namespace_fingerprint_id)));
+ }
+
+ // Finalize the posting list and update mapper.
+ PostingListAccessor::FinalizeResult result =
+ std::move(*pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError("Fail to flush data into posting list(s)");
+ }
+ ICING_RETURN_IF_ERROR(schema_joinable_id_to_posting_list_mapper_->Put(
+ encoded_schema_type_joinable_property_id_str, result.id));
+
+ // Update info.
+ info().num_data += ref_namespace_fingerprint_ids.size();
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase>>
+QualifiedIdJoinIndexImplV2::GetIterator(
+ SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const {
+ ICING_ASSIGN_OR_RETURN(
+ std::string encoded_schema_type_joinable_property_id_str,
+ EncodeSchemaTypeJoinablePropertyId(schema_type_id, joinable_property_id));
+
+ ICING_ASSIGN_OR_RETURN(
+ PostingListIdentifier posting_list_identifier,
+ GetPostingListIdentifier(*schema_joinable_id_to_posting_list_mapper_,
+ encoded_schema_type_joinable_property_id_str));
+
+ if (!posting_list_identifier.is_valid()) {
+ return std::make_unique<JoinDataIterator>(nullptr);
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_.get(),
+ posting_list_identifier));
+
+ return std::make_unique<JoinDataIterator>(std::move(pl_accessor));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ DocumentId new_last_added_document_id) {
+ std::string temp_working_path = working_path_ + "_temp";
+ ICING_RETURN_IF_ERROR(
+ QualifiedIdJoinIndex::Discard(filesystem_, temp_working_path));
+
+ DestructibleDirectory temp_working_path_ddir(&filesystem_,
+ std::move(temp_working_path));
+ if (!temp_working_path_ddir.is_valid()) {
+ return absl_ports::InternalError(
+ "Unable to create temp directory to build new qualified id join index "
+ "(v2)");
+ }
+
+ {
+ // Transfer all data from the current to new qualified id join index. Also
+ // PersistToDisk and destruct the instance after finishing, so we can safely
+ // swap directories later.
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> new_index,
+ Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_));
+ ICING_RETURN_IF_ERROR(TransferIndex(
+ document_id_old_to_new, namespace_id_old_to_new, new_index.get()));
+ new_index->set_last_added_document_id(new_last_added_document_id);
+ ICING_RETURN_IF_ERROR(new_index->PersistToDisk());
+ }
+
+ // Destruct current index's storage instances to safely swap directories.
+ // TODO(b/268521214): handle delete propagation storage
+ schema_joinable_id_to_posting_list_mapper_.reset();
+ flash_index_storage_.reset();
+
+ if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+ working_path_.c_str())) {
+ return absl_ports::InternalError(
+ "Unable to apply new qualified id join index (v2) due to failed swap");
+ }
+
+ // Reinitialize qualified id join index.
+ if (!filesystem_.PRead(GetMetadataFilePath(working_path_).c_str(),
+ metadata_buffer_.get(), kMetadataFileSize,
+ /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read metadata file");
+ }
+ ICING_ASSIGN_OR_RETURN(
+ schema_joinable_id_to_posting_list_mapper_,
+ PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+ filesystem_,
+ GetSchemaJoinableIdToPostingListMapperPath(working_path_),
+ pre_mapping_fbv_,
+ /*max_num_entries=*/
+ kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+ /*average_kv_byte_size=*/
+ kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path_),
+ &filesystem_, posting_list_serializer_.get()));
+ flash_index_storage_ =
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Clear() {
+ SetDirty();
+
+ schema_joinable_id_to_posting_list_mapper_.reset();
+ // Discard and reinitialize schema_joinable_id_to_posting_list_mapper.
+ std::string schema_joinable_id_to_posting_list_mapper_path =
+ GetSchemaJoinableIdToPostingListMapperPath(working_path_);
+ ICING_RETURN_IF_ERROR(
+ PersistentHashMapKeyMapper<PostingListIdentifier>::Delete(
+ filesystem_, schema_joinable_id_to_posting_list_mapper_path));
+ ICING_ASSIGN_OR_RETURN(
+ schema_joinable_id_to_posting_list_mapper_,
+ PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+ filesystem_,
+ std::move(schema_joinable_id_to_posting_list_mapper_path),
+ pre_mapping_fbv_,
+ /*max_num_entries=*/
+ kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+ /*average_kv_byte_size=*/
+ kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+ // Discard and reinitialize flash_index_storage.
+ flash_index_storage_.reset();
+ if (!filesystem_.DeleteFile(
+ GetFlashIndexStorageFilePath(working_path_).c_str())) {
+ return absl_ports::InternalError("Fail to delete flash index storage file");
+ }
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path_),
+ &filesystem_, posting_list_serializer_.get()));
+ flash_index_storage_ =
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+
+ // TODO(b/268521214): clear delete propagation storage
+
+ info().num_data = 0;
+ info().last_added_document_id = kInvalidDocumentId;
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::InitializeNewFiles(const Filesystem& filesystem,
+ std::string&& working_path,
+ bool pre_mapping_fbv) {
+ // Create working directory.
+ if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_path));
+ }
+
+ // Initialize schema_joinable_id_to_posting_list_mapper
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<KeyMapper<PostingListIdentifier>>
+ schema_joinable_id_to_posting_list_mapper,
+ PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+ filesystem, GetSchemaJoinableIdToPostingListMapperPath(working_path),
+ pre_mapping_fbv,
+ /*max_num_entries=*/
+ kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+ /*average_kv_byte_size=*/
+ kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+ // Initialize flash_index_storage
+ auto posting_list_serializer =
+ std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+ &filesystem, posting_list_serializer.get()));
+
+ // Create instance.
+ auto new_join_index = std::unique_ptr<QualifiedIdJoinIndexImplV2>(
+ new QualifiedIdJoinIndexImplV2(
+ filesystem, std::move(working_path),
+ /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
+ std::move(schema_joinable_id_to_posting_list_mapper),
+ std::move(posting_list_serializer),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage)),
+ pre_mapping_fbv));
+ // Initialize info content.
+ new_join_index->info().magic = Info::kMagic;
+ new_join_index->info().num_data = 0;
+ new_join_index->info().last_added_document_id = kInvalidDocumentId;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_join_index->InitializeNewStorage());
+
+ return new_join_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ bool pre_mapping_fbv) {
+ // PRead metadata file.
+ auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
+ if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
+ metadata_buffer.get(), kMetadataFileSize,
+ /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read metadata file");
+ }
+
+ // Initialize schema_joinable_id_to_posting_list_mapper
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<KeyMapper<PostingListIdentifier>>
+ schema_joinable_id_to_posting_list_mapper,
+ PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+ filesystem, GetSchemaJoinableIdToPostingListMapperPath(working_path),
+ pre_mapping_fbv,
+ /*max_num_entries=*/
+ kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+ /*average_kv_byte_size=*/
+ kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+ // Initialize flash_index_storage
+ auto posting_list_serializer =
+ std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+ &filesystem, posting_list_serializer.get()));
+
+ // Create instance.
+ auto join_index = std::unique_ptr<QualifiedIdJoinIndexImplV2>(
+ new QualifiedIdJoinIndexImplV2(
+ filesystem, std::move(working_path), std::move(metadata_buffer),
+ std::move(schema_joinable_id_to_posting_list_mapper),
+ std::move(posting_list_serializer),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage)),
+ pre_mapping_fbv));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(join_index->InitializeExistingStorage());
+
+ // Validate magic.
+ if (join_index->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ return join_index;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ QualifiedIdJoinIndexImplV2* new_index) const {
+ std::unique_ptr<KeyMapper<PostingListIdentifier>::Iterator> iter =
+ schema_joinable_id_to_posting_list_mapper_->GetIterator();
+
+ // Iterate through all (schema_type_id, joinable_property_id).
+ while (iter->Advance()) {
+ PostingListIdentifier old_pl_id = iter->GetValue();
+ if (!old_pl_id.is_valid()) {
+ // Skip invalid posting list id.
+ continue;
+ }
+
+ // Read all join data from old posting lists and convert to new join data
+ // with new document id, namespace id.
+ std::vector<JoinDataType> new_join_data_vec;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>
+ old_pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_.get(),
+ old_pl_id));
+ ICING_ASSIGN_OR_RETURN(std::vector<JoinDataType> batch_old_join_data,
+ old_pl_accessor->GetNextDataBatch());
+ while (!batch_old_join_data.empty()) {
+ for (const JoinDataType& old_join_data : batch_old_join_data) {
+ DocumentId new_document_id = GetNewDocumentId(
+ document_id_old_to_new, old_join_data.document_id());
+ NamespaceId new_ref_namespace_id = GetNewNamespaceId(
+ namespace_id_old_to_new, old_join_data.join_info().namespace_id());
+
+ // Transfer if the document and namespace are not deleted or outdated.
+ if (new_document_id != kInvalidDocumentId &&
+ new_ref_namespace_id != kInvalidNamespaceId) {
+ // We can reuse the fingerprint from old_join_data, since document uri
+ // (and its fingerprint) will never change.
+ new_join_data_vec.push_back(JoinDataType(
+ new_document_id, NamespaceFingerprintIdentifier(
+ new_ref_namespace_id,
+ old_join_data.join_info().fingerprint())));
+ }
+ }
+ ICING_ASSIGN_OR_RETURN(batch_old_join_data,
+ old_pl_accessor->GetNextDataBatch());
+ }
+
+ if (new_join_data_vec.empty()) {
+ continue;
+ }
+
+ // NamespaceId order may change, so we have to sort the vector.
+ std::sort(new_join_data_vec.begin(), new_join_data_vec.end());
+
+ // Create new posting list in new_index and prepend all new join data into
+ // it.
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>
+ new_pl_accessor,
+ PostingListJoinDataAccessor<JoinDataType>::Create(
+ new_index->flash_index_storage_.get(),
+ new_index->posting_list_serializer_.get()));
+ for (const JoinDataType& new_join_data : new_join_data_vec) {
+ ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(new_join_data));
+ }
+
+ // Finalize the posting list and update mapper of new_index.
+ PostingListAccessor::FinalizeResult result =
+ std::move(*new_pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError(
+ "Fail to flush data into posting list(s)");
+ }
+ ICING_RETURN_IF_ERROR(
+ new_index->schema_joinable_id_to_posting_list_mapper_->Put(
+ iter->GetKey(), result.id));
+
+ // Update info.
+ new_index->info().num_data += new_join_data_vec.size();
+ }
+
+ // TODO(b/268521214): transfer delete propagation storage
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::PersistMetadataToDisk(
+ bool force) {
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::string metadata_file_path = GetMetadataFilePath(working_path_);
+
+ ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError("Fail to open metadata file for write");
+ }
+
+ if (!filesystem_.PWrite(sfd.get(), /*offset=*/0, metadata_buffer_.get(),
+ kMetadataFileSize)) {
+ return absl_ports::InternalError("Fail to write metadata file");
+ }
+
+ if (!filesystem_.DataSync(sfd.get())) {
+ return absl_ports::InternalError("Fail to sync metadata to disk");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(
+ schema_joinable_id_to_posting_list_mapper_->PersistToDisk());
+ if (!flash_index_storage_->PersistToDisk()) {
+ return absl_ports::InternalError(
+ "Fail to persist FlashIndexStorage to disk");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV2::ComputeInfoChecksum(bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV2::ComputeStoragesChecksum(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ Crc32 schema_joinable_id_to_posting_list_mapper_crc,
+ schema_joinable_id_to_posting_list_mapper_->ComputeChecksum());
+
+ return Crc32(schema_joinable_id_to_posting_list_mapper_crc.Get());
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-index-impl-v2.h b/icing/join/qualified-id-join-index-impl-v2.h
new file mode 100644
index 0000000..2b0bf3f
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2.h
@@ -0,0 +1,369 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-accessor.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndexImplV2: a class to maintain join data (DocumentId to
+// referenced NamespaceFingerprintIdentifier). It stores join data in posting
+// lists and bucketizes them by (schema_type_id, joinable_property_id).
+class QualifiedIdJoinIndexImplV2 : public QualifiedIdJoinIndex {
+ public:
+ using JoinDataType = DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>;
+
+ class JoinDataIterator : public JoinDataIteratorBase {
+ public:
+ explicit JoinDataIterator(
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor)
+ : pl_accessor_(std::move(pl_accessor)),
+ should_retrieve_next_batch_(true) {}
+
+ ~JoinDataIterator() override = default;
+
+ // Advances to the next data.
+ //
+ // Returns:
+ // - OK on success
+ // - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+ // data)
+ // - Any other PostingListJoinDataAccessor errors
+ libtextclassifier3::Status Advance() override;
+
+ const JoinDataType& GetCurrent() const override { return *curr_; }
+
+ private:
+ // Gets next batch of data from the posting list chain, caches in
+ // cached_batch_integer_index_data_, and sets curr_ to the begin of the
+ // cache.
+ libtextclassifier3::Status GetNextDataBatch();
+
+ std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor_;
+ std::vector<JoinDataType> cached_batch_join_data_;
+ std::vector<JoinDataType>::const_iterator curr_;
+ bool should_retrieve_next_batch_;
+ };
+
+ struct Info {
+ static constexpr int32_t kMagic = 0x12d1c074;
+
+ int32_t magic;
+ int32_t num_data;
+ DocumentId last_added_document_id;
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(
+ std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+ }
+ } __attribute__((packed));
+ static_assert(sizeof(Info) == 12, "");
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+ static constexpr int32_t kInfoMetadataBufferOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 24, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
+
+ // Creates a QualifiedIdJoinIndexImplV2 instance to store join data
+ // (DocumentId to referenced NamespaceFingerPrintIdentifier) for future
+ // joining search. If any of the underlying file is missing, then delete the
+ // whole working_path and (re)initialize with new ones. Otherwise initialize
+ // and create the instance by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // QualifiedIdJoinIndexImplV2 uses working path as working
+ // directory and all related files will be stored under this
+ // directory. It takes full ownership and of working_path_,
+ // including creation/deletion. It is the caller's
+ // responsibility to specify correct working path and avoid
+ // mixing different persistent storages together under the same
+ // path. Also the caller has the ownership for the parent
+ // directory of working_path_, and it is responsible for parent
+ // directory creation/deletion. See PersistentStorage for more
+ // details about the concept of working_path.
+ // pre_mapping_fbv: flag indicating whether memory map max possible file size
+ // for underlying FileBackedVector before growing the actual
+ // file size.
+ //
+ // Returns:
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum
+ // - INTERNAL_ERROR on I/O errors
+ // - Any KeyMapper errors
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+ Create(const Filesystem& filesystem, std::string working_path,
+ bool pre_mapping_fbv);
+
+ // Delete copy and move constructor/assignment operator.
+ QualifiedIdJoinIndexImplV2(const QualifiedIdJoinIndexImplV2&) = delete;
+ QualifiedIdJoinIndexImplV2& operator=(const QualifiedIdJoinIndexImplV2&) =
+ delete;
+
+ QualifiedIdJoinIndexImplV2(QualifiedIdJoinIndexImplV2&&) = delete;
+ QualifiedIdJoinIndexImplV2& operator=(QualifiedIdJoinIndexImplV2&&) = delete;
+
+ ~QualifiedIdJoinIndexImplV2() override;
+
+ // v1 only API. Returns UNIMPLEMENTED_ERROR.
+ libtextclassifier3::Status Put(
+ const DocJoinInfo& doc_join_info,
+ std::string_view ref_qualified_id_str) override {
+ return absl_ports::UnimplementedError("This API is not supported in V2");
+ }
+
+ // v1 only API. Returns UNIMPLEMENTED_ERROR.
+ libtextclassifier3::StatusOr<std::string_view> Get(
+ const DocJoinInfo& doc_join_info) const override {
+ return absl_ports::UnimplementedError("This API is not supported in V2");
+ }
+
+ // Puts a list of referenced (parent) NamespaceFingerprintIdentifiers into
+ // the join index, given the (child) DocumentId, SchemaTypeId and
+ // JoinablePropertyId.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
+ // document_id is invalid
+ // - Any KeyMapper/FlashIndexStorage errors
+ libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id,
+ DocumentId document_id,
+ std::vector<NamespaceFingerprintIdentifier>&&
+ ref_namespace_fingerprint_ids) override;
+
+ // Returns a JoinDataIterator for iterating through all join data of the
+ // specified (schema_type_id, joinable_property_id).
+ //
+ // Returns:
+ // - On success: a JoinDataIterator
+ // - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
+ // invalid
+ // - Any KeyMapper/FlashIndexStorage errors
+ libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+ GetIterator(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const override;
+
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Qualified id join index will convert all entries to the new
+ // document ids and namespace ids.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - namespace_id_old_to_new: a map for converting old namespace id to new
+ // namespace id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the qualified id join index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+ // an invalid state and the caller should handle it properly (e.g. discard
+ // and rebuild)
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
+
+ // Clears all data and set last_added_document_id to kInvalidDocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Clear() override;
+
+ bool is_v2() const override { return true; }
+
+ int32_t size() const override { return info().num_data; }
+
+ bool empty() const override { return size() == 0; }
+
+ DocumentId last_added_document_id() const override {
+ return info().last_added_document_id;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) override {
+ SetInfoDirty();
+
+ Info& info_ref = info();
+ if (info_ref.last_added_document_id == kInvalidDocumentId ||
+ document_id > info_ref.last_added_document_id) {
+ info_ref.last_added_document_id = document_id;
+ }
+ }
+
+ private:
+ explicit QualifiedIdJoinIndexImplV2(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<uint8_t[]> metadata_buffer,
+ std::unique_ptr<KeyMapper<PostingListIdentifier>>
+ schema_joinable_id_to_posting_list_mapper,
+ std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>>
+ posting_list_serializer,
+ std::unique_ptr<FlashIndexStorage> flash_index_storage,
+ bool pre_mapping_fbv)
+ : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
+ metadata_buffer_(std::move(metadata_buffer)),
+ schema_joinable_id_to_posting_list_mapper_(
+ std::move(schema_joinable_id_to_posting_list_mapper)),
+ posting_list_serializer_(std::move(posting_list_serializer)),
+ flash_index_storage_(std::move(flash_index_storage)),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
+
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ bool pre_mapping_fbv);
+
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+ InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path, bool pre_mapping_fbv);
+
+ // Transfers qualified id join index data from the current to new_index and
+ // convert to new document id according to document_id_old_to_new and
+ // namespace_id_old_to_new. It is a helper function for Optimize.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ QualifiedIdJoinIndexImplV2* new_index) const;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+ // Computes and returns all storages checksum.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
+ // Metadata buffer
+ std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+ // Persistent KeyMapper for mapping (schema_type_id, joinable_property_id) to
+ // PostingListIdentifier.
+ std::unique_ptr<KeyMapper<PostingListIdentifier>>
+ schema_joinable_id_to_posting_list_mapper_;
+
+ // Posting list related members. Use posting list to store join data
+ // (document id to referenced NamespaceFingerprintIdentifier).
+ std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>>
+ posting_list_serializer_;
+ std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+
+ // TODO(b/268521214): add delete propagation storage
+
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
diff --git a/icing/join/qualified-id-join-index-impl-v2_test.cc b/icing/join/qualified-id-join-index-impl-v2_test.cc
new file mode 100644
index 0000000..d73d6c2
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2_test.cc
@@ -0,0 +1,1414 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = QualifiedIdJoinIndexImplV2::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+
+class QualifiedIdJoinIndexImplV2Test : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/qualified_id_join_index_impl_v2_test";
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
+};
+
+libtextclassifier3::StatusOr<
+ std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
+GetJoinData(const QualifiedIdJoinIndexImplV2& index,
+ SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
+ index.GetIterator(schema_type_id, joinable_property_id));
+
+ std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
+ while (iter->Advance().ok()) {
+ result.push_back(iter->GetCurrent());
+ }
+
+ return result;
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidWorkingPath) {
+ EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(
+ filesystem_, "/dev/null/qualified_id_join_index_impl_v2_test",
+ /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InitializeNewFiles) {
+ {
+ // Create new qualified id join index
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+ ASSERT_THAT(
+ filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Check info section
+ const Info* info = reinterpret_cast<const Info*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+ EXPECT_THAT(info->magic, Eq(Info::kMagic));
+ EXPECT_THAT(info->num_data, Eq(0));
+ EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
+
+ // Check crcs section
+ const Crcs* crcs = reinterpret_cast<const Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+ // There are some initial info in KeyMapper, so storages_crc should be
+ // non-zero.
+ EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
+ EXPECT_THAT(crcs->component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs->all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs->component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ // Insert some data.
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+ ICING_ASSERT_OK(index->PersistToDisk());
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+ /*ref_namespace_fingerprint_ids=*/{id3}));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id4}));
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ InitializationShouldSucceedWithPersistToDisk) {
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index1,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ // Insert some data.
+ ICING_ASSERT_OK(index1->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+ ICING_ASSERT_OK(index1->Put(
+ /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+ /*ref_namespace_fingerprint_ids=*/{id3}));
+ ICING_ASSERT_OK(index1->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id4}));
+ ASSERT_THAT(index1, Pointee(SizeIs(4)));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(index1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index2,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ EXPECT_THAT(index2, Pointee(SizeIs(4)));
+ EXPECT_THAT(
+ GetJoinData(*index2, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id4),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id2),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id1))));
+ EXPECT_THAT(
+ GetJoinData(*index2, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/6, /*join_info=*/id3))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ InitializationShouldSucceedAfterDestruction) {
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ // Insert some data.
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+ /*ref_namespace_fingerprint_ids=*/{id3}));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id4}));
+ ASSERT_THAT(index, Pointee(SizeIs(4)));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(
+ GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id4),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id2),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id1))));
+ EXPECT_THAT(
+ GetJoinData(*index, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/6, /*join_info=*/id3))));
+ }
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ InitializeExistingFilesWithDifferentMagicShouldFail) {
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+ /*fingerprint=*/12)}));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+ ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Manually change magic and update checksum
+ Crcs* crcs = reinterpret_cast<Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+ Info* info = reinterpret_cast<Info*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+ info->magic += kCorruptedValueOffset;
+ crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+ crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id join index with different magic. This
+ // should fail.
+ EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Incorrect magic value")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+ /*fingerprint=*/12)}));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+ ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Manually corrupt all_crc
+ Crcs* crcs = reinterpret_cast<Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+ crcs->all_crc += kCorruptedValueOffset;
+
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id join index with metadata containing
+ // corrupted all_crc. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid all crc")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+ /*fingerprint=*/12)}));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/metadata");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+ ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ Info* info = reinterpret_cast<Info*>(
+ metadata_buffer.get() +
+ QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+ info->last_added_document_id += kCorruptedValueOffset;
+
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id join index with info that doesn't match
+ // its checksum. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid info crc")));
+}
+
+TEST_F(
+ QualifiedIdJoinIndexImplV2Test,
+ InitializeExistingFilesWithCorruptedSchemaJoinableIdToPostingListMapperShouldFail) {
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+ /*fingerprint=*/12)}));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Corrupt schema_joinable_id_to_posting_list_mapper manually.
+ {
+ std::string mapper_working_path = absl_ports::StrCat(
+ working_path_, "/schema_joinable_id_to_posting_list_mapper");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<PostingListIdentifier>> mapper,
+ PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+ filesystem_, std::move(mapper_working_path),
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
+ ICING_ASSERT_OK(mapper->Put("foo", PostingListIdentifier::kInvalid));
+ ICING_ASSERT_OK(mapper->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, mapper->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ // Attempt to create the qualified id join index with corrupted
+ // doc_join_info_mapper. This should fail.
+ EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid storages crc")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidPut) {
+ NamespaceFingerprintIdentifier id(/*namespace_id=*/1, /*fingerprint=*/12);
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ EXPECT_THAT(
+ index->Put(/*schema_type_id=*/-1, /*joinable_property_id=*/1,
+ /*document_id=*/5, /*ref_namespace_fingerprint_ids=*/{id}),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(
+ index->Put(/*schema_type_id=*/2, /*joinable_property_id=*/-1,
+ /*document_id=*/5, /*ref_namespace_fingerprint_ids=*/{id}),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(index->Put(/*schema_type_id=*/2, /*joinable_property_id=*/1,
+ /*document_id=*/kInvalidDocumentId,
+ /*ref_namespace_fingerprint_ids=*/{id}),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidGetIterator) {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ EXPECT_THAT(
+ index->GetIterator(/*schema_type_id=*/-1, /*joinable_property_id=*/1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(
+ index->GetIterator(/*schema_type_id=*/2, /*joinable_property_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ PutEmptyRefNamespaceFingerprintIdsShouldReturnOk) {
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{}),
+ IsOk());
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ PutAndGetSingleSchemaTypeAndJoinableProperty) {
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/12);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/56);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/78);
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id2, id1}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/6,
+ /*ref_namespace_fingerprint_ids=*/{id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id4),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/6, /*join_info=*/id3),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id1),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id2))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+ IsOkAndHolds(IsEmpty()));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Verify we can get all of them after destructing and re-initializing.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id4),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/6, /*join_info=*/id3),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id1),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id2))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+ PutAndGetMultipleSchemaTypesAndJoinableProperties) {
+ SchemaTypeId schema_type_id1 = 2;
+ SchemaTypeId schema_type_id2 = 4;
+
+ JoinablePropertyId joinable_property_id1 = 1;
+ JoinablePropertyId joinable_property_id2 = 10;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/12);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/56);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/78);
+
+ {
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ EXPECT_THAT(
+ index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id1}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id1, joinable_property_id2, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id2}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id2, joinable_property_id1, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+
+ EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id1),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id1))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id2),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id2))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id1),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id3))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id2),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id4))));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
+
+ // Verify we can get all of them after destructing and re-initializing.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id1),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id1))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id2),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/5, /*join_info=*/id2))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id1),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id3))));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id2),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/12, /*join_info=*/id4))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, SetLastAddedDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ constexpr DocumentId kDocumentId = 100;
+ index->set_last_added_document_id(kDocumentId);
+ EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 123;
+ index->set_last_added_document_id(kNextDocumentId);
+ EXPECT_THAT(index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TEST_F(
+ QualifiedIdJoinIndexImplV2Test,
+ SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ constexpr DocumentId kDocumentId = 123;
+ index->set_last_added_document_id(kDocumentId);
+ ASSERT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 100;
+ ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+ index->set_last_added_document_id(kNextDocumentId);
+ // last_added_document_id() should remain unchanged.
+ EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, Optimize) {
+ // General test for Optimize().
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id1 = 2;
+ SchemaTypeId schema_type_id2 = 5;
+
+ JoinablePropertyId joinable_property_id1 = 11;
+ JoinablePropertyId joinable_property_id2 = 15;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/2, /*fingerprint=*/101);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/3, /*fingerprint=*/102);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/4, /*fingerprint=*/103);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/104);
+ NamespaceFingerprintIdentifier id5(/*namespace_id=*/0, /*fingerprint=*/105);
+ NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+ NamespaceFingerprintIdentifier id7(/*namespace_id=*/3, /*fingerprint=*/107);
+ NamespaceFingerprintIdentifier id8(/*namespace_id=*/2, /*fingerprint=*/108);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/3,
+ /*ref_namespace_fingerprint_ids=*/{id1, id2, id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/8,
+ /*ref_namespace_fingerprint_ids=*/{id5, id6}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/13,
+ /*ref_namespace_fingerprint_ids=*/{id7}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/21,
+ /*ref_namespace_fingerprint_ids=*/{id8}),
+ IsOk());
+ index->set_last_added_document_id(21);
+
+ ASSERT_THAT(index, Pointee(SizeIs(8)));
+
+ // Delete doc id = 5, 13, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+ document_id_old_to_new[3] = 0;
+ document_id_old_to_new[8] = 1;
+ document_id_old_to_new[21] = 2;
+
+ // Delete namespace id 1, 2 (and invalidate id1, id6, id8). Reorder namespace
+ // ids [0, 3, 4] to [1, 2, 0].
+ std::vector<NamespaceId> namespace_id_old_to_new(5, kInvalidNamespaceId);
+ namespace_id_old_to_new[0] = 1;
+ namespace_id_old_to_new[3] = 2;
+ namespace_id_old_to_new[4] = 0;
+
+ DocumentId new_last_added_document_id = 2;
+ EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(3)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+ // Verify GetIterator API should work normally after Optimize().
+ // 1) schema_type_id1, joinable_property_id1:
+ // - old_doc_id=21, old_ref_namespace_id=2: NOT FOUND
+ // - old_doc_id=13, old_ref_namespace_id=3: NOT FOUND
+ // - old_doc_id=3, old_ref_namespace_id=4:
+ // become new_doc_id=0, new_ref_namespace_id=0
+ // - old_doc_id=3, old_ref_namespace_id=3:
+ // become new_doc_id=0, new_ref_namespace_id=2
+ // - old_doc_id=3, old_ref_namespace_id=2: NOT FOUND
+ //
+ // For new_doc_id=0, it should reorder due to posting list restriction.
+ EXPECT_THAT(
+ GetJoinData(*index, schema_type_id1, joinable_property_id1),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/2, /*fingerprint=*/102)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/103)))));
+
+ // 2) schema_type_id2, joinable_property_id2:
+ // - old_doc_id=8, old_ref_namespace_id=1: NOT FOUND
+ // - old_doc_id=8, old_ref_namespace_id=0:
+ // become new_doc_id=1, new_ref_namespace_id=1
+ // - old_doc_id=5, old_ref_namespace_id=0: NOT FOUND
+ EXPECT_THAT(
+ GetJoinData(*index, schema_type_id2, joinable_property_id2),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/105)))));
+
+ // Verify Put API should work normally after Optimize().
+ NamespaceFingerprintIdentifier id9(/*namespace_id=*/1, /*fingerprint=*/109);
+ EXPECT_THAT(
+ index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/99,
+ /*ref_namespace_fingerprint_ids=*/{id9}),
+ IsOk());
+ index->set_last_added_document_id(99);
+
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(99));
+ EXPECT_THAT(
+ GetJoinData(*index, schema_type_id1, joinable_property_id1),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/99, /*join_info=*/id9),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/2, /*fingerprint=*/102)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/103)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDocumentIdChange) {
+ // Specific test for Optimize(): document id compaction.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/101);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/103);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+ NamespaceFingerprintIdentifier id5(/*namespace_id=*/1, /*fingerprint=*/105);
+ NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+ /*ref_namespace_fingerprint_ids=*/{id1, id2}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+ /*ref_namespace_fingerprint_ids=*/{id5}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+ /*ref_namespace_fingerprint_ids=*/{id6}),
+ IsOk());
+ index->set_last_added_document_id(21);
+
+ ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+ // Delete doc id = 5, 8, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+ document_id_old_to_new[3] = 0;
+ document_id_old_to_new[13] = 1;
+ document_id_old_to_new[21] = 2;
+
+ // No change for namespace id.
+ std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+ DocumentId new_last_added_document_id = 2;
+ EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+ // Verify GetIterator API should work normally after Optimize().
+ // - old_doc_id=21, join_info=id6: become doc_id=2, join_info=id6
+ // - old_doc_id=13, join_info=id5: become doc_id=1, join_info=id5
+ // - old_doc_id=8, join_info=id4: NOT FOUND
+ // - old_doc_id=5, join_info=id3: NOT FOUND
+ // - old_doc_id=3, join_info=id2: become doc_id=0, join_info=id2
+ // - old_doc_id=3, join_info=id1: become doc_id=0, join_info=id1
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, /*join_info=*/id6),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, /*join_info=*/id5),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/id2),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/id1))));
+
+ // Verify Put API should work normally after Optimize().
+ NamespaceFingerprintIdentifier id7(/*namespace_id=*/1, /*fingerprint=*/107);
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+ /*ref_namespace_fingerprint_ids=*/{id7}),
+ IsOk());
+ index->set_last_added_document_id(99);
+
+ EXPECT_THAT(index, Pointee(SizeIs(5)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(99));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/99, /*join_info=*/id7),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, /*join_info=*/id6),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, /*join_info=*/id5),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/id2),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/id1))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeOutOfRangeDocumentId) {
+ // Specific test for Optimize() for out of range document id.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+ NamespaceFingerprintIdentifier id(/*namespace_id=*/1, /*fingerprint=*/101);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+ /*ref_namespace_fingerprint_ids=*/{id}),
+ IsOk());
+ index->set_last_added_document_id(99);
+
+ // Create document_id_old_to_new with size = 1. Optimize should handle out of
+ // range DocumentId properly.
+ std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId};
+ std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+ // There shouldn't be any error due to vector index.
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDeleteAllDocuments) {
+ // Specific test for Optimize(): delete all document ids.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/101);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/103);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+ NamespaceFingerprintIdentifier id5(/*namespace_id=*/1, /*fingerprint=*/105);
+ NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+ /*ref_namespace_fingerprint_ids=*/{id1, id2}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+ /*ref_namespace_fingerprint_ids=*/{id5}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+ /*ref_namespace_fingerprint_ids=*/{id6}),
+ IsOk());
+ index->set_last_added_document_id(21);
+
+ ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+ // Delete all documents.
+ std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+
+ // No change for namespace id.
+ std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeNamespaceIdChange) {
+ // Specific test for Optimize(): referenced namespace id compaction.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/101);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/5, /*fingerprint=*/102);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/4, /*fingerprint=*/103);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/104);
+ NamespaceFingerprintIdentifier id5(/*namespace_id=*/2, /*fingerprint=*/105);
+ NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/2,
+ /*ref_namespace_fingerprint_ids=*/{id1}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+ /*ref_namespace_fingerprint_ids=*/{id2}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+ /*ref_namespace_fingerprint_ids=*/{id5}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+ /*ref_namespace_fingerprint_ids=*/{id6}),
+ IsOk());
+ index->set_last_added_document_id(21);
+
+ ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+ // No change for document id.
+ std::vector<DocumentId> document_id_old_to_new(22);
+ std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+ // Delete namespace id 2, 4. Reorder namespace id [0, 1, 3, 5] to [2, 3, 1,
+ // 0].
+ std::vector<NamespaceId> namespace_id_old_to_new(6, kInvalidNamespaceId);
+ namespace_id_old_to_new[0] = 2;
+ namespace_id_old_to_new[1] = 3;
+ namespace_id_old_to_new[3] = 1;
+ namespace_id_old_to_new[5] = 0;
+
+ DocumentId new_last_added_document_id = 21;
+ EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+ // Verify GetIterator API should work normally after Optimize().
+ // - id6 (old_namespace_id=1): new_namespace_id=3 (document_id = 21)
+ // - id5 (old_namespace_id=2): NOT FOUND
+ // - id4 (old_namespace_id=0): new_namespace_id=2 (document_id = 8)
+ // - id3 (old_namespace_id=4): NOT FOUND
+ // - id2 (old_namespace_id=5): new_namespace_id=0 (document_id = 3)
+ // - id1 (old_namespace_id=3): new_namespace_id=1 (document_id = 2)
+ EXPECT_THAT(
+ GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/21, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/3, /*fingerprint=*/106)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/8, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/2, /*fingerprint=*/104)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/3, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/102)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/101)))));
+
+ // Verify Put API should work normally after Optimize().
+ NamespaceFingerprintIdentifier id7(/*namespace_id=*/1, /*fingerprint=*/107);
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+ /*ref_namespace_fingerprint_ids=*/{id7}),
+ IsOk());
+ index->set_last_added_document_id(99);
+
+ EXPECT_THAT(index, Pointee(SizeIs(5)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(99));
+ EXPECT_THAT(
+ GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/99, /*join_info=*/id7),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/21, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/3, /*fingerprint=*/106)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/8, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/2, /*fingerprint=*/104)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/3, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/102)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/2, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/101)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeNamespaceIdChangeShouldReorder) {
+ // Specific test for Optimize(): referenced namespace id reorder.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/0, /*fingerprint=*/101);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/103);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+ /*ref_namespace_fingerprint_ids=*/{id1, id2, id3}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/1,
+ /*ref_namespace_fingerprint_ids=*/{id4}),
+ IsOk());
+ index->set_last_added_document_id(1);
+
+ ASSERT_THAT(index, Pointee(SizeIs(4)));
+
+ // No change for document id.
+ std::vector<DocumentId> document_id_old_to_new = {0, 1};
+
+ // Reorder namespace id [0, 1, 2] to [2, 0, 1].
+ std::vector<NamespaceId> namespace_id_old_to_new = {2, 0, 1};
+
+ DocumentId new_last_added_document_id = 1;
+ EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+ // Verify GetIterator API should work normally after Optimize().
+ // - id4 (old_namespace_id=1): new_namespace_id=0 (document_id = 1)
+ // - id3 (old_namespace_id=2): new_namespace_id=1 (document_id = 0)
+ // - id2 (old_namespace_id=1): new_namespace_id=0 (document_id = 0)
+ // - id1 (old_namespace_id=0): new_namespace_id=2 (document_id = 0)
+ //
+ // Should reorder to [id4, id1, id3, id2] due to posting list restriction.
+ EXPECT_THAT(
+ GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(ElementsAre(
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/1, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/104)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/2, /*fingerprint=*/101)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/1, /*fingerprint=*/103)),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+ /*namespace_id=*/0, /*fingerprint=*/102)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeOutOfRangeNamespaceId) {
+ // Specific test for Optimize(): out of range referenced namespace id.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+ NamespaceFingerprintIdentifier id(/*namespace_id=*/99, /*fingerprint=*/101);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+ /*ref_namespace_fingerprint_ids=*/{id}),
+ IsOk());
+ index->set_last_added_document_id(0);
+
+ // Create namespace_id_old_to_new with size = 1. Optimize should handle out of
+ // range NamespaceId properly.
+ std::vector<DocumentId> document_id_old_to_new = {0};
+ std::vector<NamespaceId> namespace_id_old_to_new = {kInvalidNamespaceId};
+
+ // There shouldn't be any error due to vector index.
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDeleteAllNamespaces) {
+ // Specific test for Optimize(): delete all referenced namespace ids.
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+
+ SchemaTypeId schema_type_id = 2;
+ JoinablePropertyId joinable_property_id = 1;
+
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/0, /*fingerprint=*/101);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/103);
+
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+ /*ref_namespace_fingerprint_ids=*/{id1}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/1,
+ /*ref_namespace_fingerprint_ids=*/{id2}),
+ IsOk());
+ EXPECT_THAT(
+ index->Put(schema_type_id, joinable_property_id, /*document_id=*/2,
+ /*ref_namespace_fingerprint_ids=*/{id3}),
+ IsOk());
+ index->set_last_added_document_id(3);
+
+ ASSERT_THAT(index, Pointee(SizeIs(3)));
+
+ // No change for document id.
+ std::vector<DocumentId> document_id_old_to_new = {0, 1, 2};
+
+ // Delete all namespaces.
+ std::vector<NamespaceId> namespace_id_old_to_new(3, kInvalidNamespaceId);
+
+ EXPECT_THAT(
+ index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+ EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, Clear) {
+ NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+ NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+ NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+ NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+ // Create new qualified id join index
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+ QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ // Insert some data.
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+ /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+ /*ref_namespace_fingerprint_ids=*/{id3}));
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+ /*ref_namespace_fingerprint_ids=*/{id4}));
+ ASSERT_THAT(index, Pointee(SizeIs(4)));
+ index->set_last_added_document_id(12);
+ ASSERT_THAT(index->last_added_document_id(), Eq(12));
+
+ // After Clear(), last_added_document_id should be set to kInvalidDocumentId,
+ // and the previous added data should be deleted.
+ EXPECT_THAT(index->Clear(), IsOk());
+ EXPECT_THAT(index, Pointee(IsEmpty()));
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(
+ GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetJoinData(*index, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+ IsOkAndHolds(IsEmpty()));
+
+ // Join index should be able to work normally after Clear().
+ ICING_ASSERT_OK(index->Put(
+ /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/20,
+ /*ref_namespace_fingerprint_ids=*/{id4, id2, id1, id3}));
+ index->set_last_added_document_id(20);
+
+ EXPECT_THAT(index, Pointee(SizeIs(4)));
+ EXPECT_THAT(index->last_added_document_id(), Eq(20));
+ EXPECT_THAT(
+ GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id4),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id3),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id2),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id1))));
+
+ ICING_ASSERT_OK(index->PersistToDisk());
+ index.reset();
+
+ // Verify index after reconstructing.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index, QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+ /*pre_mapping_fbv=*/false));
+ EXPECT_THAT(index->last_added_document_id(), Eq(20));
+ EXPECT_THAT(
+ GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id4),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id3),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id2),
+ DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/20, /*join_info=*/id1))));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-index.h b/icing/join/qualified-id-join-index.h
index 86297cd..4e487f9 100644
--- a/icing/join/qualified-id-join-index.h
+++ b/icing/join/qualified-id-join-index.h
@@ -19,81 +19,42 @@
#include <memory>
#include <string>
#include <string_view>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/persistent-storage.h"
#include "icing/join/doc-join-info.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
-#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
#include "icing/util/crc32.h"
namespace icing {
namespace lib {
-// QualifiedIdJoinIndex: a class to maintain data mapping DocJoinInfo to
-// joinable qualified ids and delete propagation info.
+// QualifiedIdJoinIndex: an abstract class to maintain data for qualified id
+// joining.
class QualifiedIdJoinIndex : public PersistentStorage {
public:
- struct Info {
- static constexpr int32_t kMagic = 0x48cabdc6;
+ class JoinDataIteratorBase {
+ public:
+ virtual ~JoinDataIteratorBase() = default;
- int32_t magic;
- DocumentId last_added_document_id;
+ virtual libtextclassifier3::Status Advance() = 0;
- Crc32 ComputeChecksum() const {
- return Crc32(
- std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
- }
- } __attribute__((packed));
- static_assert(sizeof(Info) == 8, "");
-
- // Metadata file layout: <Crcs><Info>
- static constexpr int32_t kCrcsMetadataBufferOffset = 0;
- static constexpr int32_t kInfoMetadataBufferOffset =
- static_cast<int32_t>(sizeof(Crcs));
- static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
- static_assert(kMetadataFileSize == 20, "");
+ virtual const DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>&
+ GetCurrent() const = 0;
+ };
static constexpr WorkingPathType kWorkingPathType =
WorkingPathType::kDirectory;
- // Creates a QualifiedIdJoinIndex instance to store qualified ids for future
- // joining search. If any of the underlying file is missing, then delete the
- // whole working_path and (re)initialize with new ones. Otherwise initialize
- // and create the instance by existing files.
- //
- // filesystem: Object to make system level calls
- // working_path: Specifies the working path for PersistentStorage.
- // QualifiedIdJoinIndex uses working path as working directory
- // and all related files will be stored under this directory. It
- // takes full ownership and of working_path_, including
- // creation/deletion. It is the caller's responsibility to
- // specify correct working path and avoid mixing different
- // persistent storages together under the same path. Also the
- // caller has the ownership for the parent directory of
- // working_path_, and it is responsible for parent directory
- // creation/deletion. See PersistentStorage for more details
- // about the concept of working_path.
- // pre_mapping_fbv: flag indicating whether memory map max possible file size
- // for underlying FileBackedVector before growing the actual
- // file size.
- // use_persistent_hash_map: flag indicating whether use persistent hash map as
- // the key mapper (if false, then fall back to
- // dynamic trie key mapper).
- //
- // Returns:
- // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
- // checksum
- // - INTERNAL_ERROR on I/O errors
- // - Any KeyMapper errors
- static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
- Create(const Filesystem& filesystem, std::string working_path,
- bool pre_mapping_fbv, bool use_persistent_hash_map);
-
// Deletes QualifiedIdJoinIndex under working_path.
//
// Returns:
@@ -105,17 +66,11 @@ class QualifiedIdJoinIndex : public PersistentStorage {
kWorkingPathType);
}
- // Delete copy and move constructor/assignment operator.
- QualifiedIdJoinIndex(const QualifiedIdJoinIndex&) = delete;
- QualifiedIdJoinIndex& operator=(const QualifiedIdJoinIndex&) = delete;
-
- QualifiedIdJoinIndex(QualifiedIdJoinIndex&&) = delete;
- QualifiedIdJoinIndex& operator=(QualifiedIdJoinIndex&&) = delete;
+ virtual ~QualifiedIdJoinIndex() override = default;
- ~QualifiedIdJoinIndex() override;
-
- // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
- // references to ref_qualified_id_str (the identifier of another document).
+ // (v1 only) Puts a new data into index: DocJoinInfo (DocumentId,
+ // JoinablePropertyId) references to ref_qualified_id_str (the identifier of
+ // another document).
//
// REQUIRES: ref_qualified_id_str contains no '\0'.
//
@@ -123,10 +78,26 @@ class QualifiedIdJoinIndex : public PersistentStorage {
// - OK on success
// - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
// - Any KeyMapper errors
- libtextclassifier3::Status Put(const DocJoinInfo& doc_join_info,
- std::string_view ref_qualified_id_str);
+ virtual libtextclassifier3::Status Put(
+ const DocJoinInfo& doc_join_info,
+ std::string_view ref_qualified_id_str) = 0;
- // Gets the referenced document's qualified id string by DocJoinInfo.
+ // (v2 only) Puts a list of referenced NamespaceFingerprintIdentifier into
+ // index, given the DocumentId, SchemaTypeId and JoinablePropertyId.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
+ // document_id is invalid
+ // - Any KeyMapper/FlashIndexStorage errors
+ virtual libtextclassifier3::Status Put(
+ SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
+ DocumentId document_id,
+ std::vector<NamespaceFingerprintIdentifier>&&
+ ref_namespace_fingerprint_ids) = 0;
+
+ // (v1 only) Gets the referenced document's qualified id string by
+ // DocJoinInfo.
//
// Returns:
// - A qualified id string referenced by the given DocJoinInfo (DocumentId,
@@ -134,8 +105,20 @@ class QualifiedIdJoinIndex : public PersistentStorage {
// - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
// - NOT_FOUND_ERROR if doc_join_info doesn't exist
// - Any KeyMapper errors
- libtextclassifier3::StatusOr<std::string_view> Get(
- const DocJoinInfo& doc_join_info) const;
+ virtual libtextclassifier3::StatusOr<std::string_view> Get(
+ const DocJoinInfo& doc_join_info) const = 0;
+
+ // (v2 only) Returns a JoinDataIterator for iterating through all join data of
+ // the specified (schema_type_id, joinable_property_id).
+ //
+ // Returns:
+ // - On success: a JoinDataIterator
+ // - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
+ // invalid
+ // - Any KeyMapper/FlashIndexStorage errors
+ virtual libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+ GetIterator(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const = 0;
// Reduces internal file sizes by reclaiming space and ids of deleted
// documents. Qualified id type joinable index will convert all entries to the
@@ -143,6 +126,8 @@ class QualifiedIdJoinIndex : public PersistentStorage {
//
// - document_id_old_to_new: a map for converting old document id to new
// document id.
+ // - namespace_id_old_to_new: a map for converting old namespace id to new
+ // namespace id.
// - new_last_added_document_id: will be used to update the last added
// document id in the qualified id type joinable
// index.
@@ -152,154 +137,48 @@ class QualifiedIdJoinIndex : public PersistentStorage {
// - INTERNAL_ERROR on I/O error. This could potentially leave the index in
// an invalid state and the caller should handle it properly (e.g. discard
// and rebuild)
- libtextclassifier3::Status Optimize(
+ virtual libtextclassifier3::Status Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
- DocumentId new_last_added_document_id);
+ const std::vector<NamespaceId>& namespace_id_old_to_new,
+ DocumentId new_last_added_document_id) = 0;
// Clears all data and set last_added_document_id to kInvalidDocumentId.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status Clear();
+ virtual libtextclassifier3::Status Clear() = 0;
- int32_t size() const { return doc_join_info_mapper_->num_keys(); }
+ virtual bool is_v2() const = 0;
- bool empty() const { return size() == 0; }
+ virtual int32_t size() const = 0;
- DocumentId last_added_document_id() const {
- return info().last_added_document_id;
- }
+ virtual bool empty() const = 0;
- void set_last_added_document_id(DocumentId document_id) {
- SetInfoDirty();
+ virtual DocumentId last_added_document_id() const = 0;
- Info& info_ref = info();
- if (info_ref.last_added_document_id == kInvalidDocumentId ||
- document_id > info_ref.last_added_document_id) {
- info_ref.last_added_document_id = document_id;
- }
- }
+ virtual void set_last_added_document_id(DocumentId document_id) = 0;
- private:
- explicit QualifiedIdJoinIndex(
- const Filesystem& filesystem, std::string&& working_path,
- std::unique_ptr<uint8_t[]> metadata_buffer,
- std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
- std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
- bool pre_mapping_fbv, bool use_persistent_hash_map)
+ protected:
+ explicit QualifiedIdJoinIndex(const Filesystem& filesystem,
+ std::string&& working_path)
: PersistentStorage(filesystem, std::move(working_path),
- kWorkingPathType),
- metadata_buffer_(std::move(metadata_buffer)),
- doc_join_info_mapper_(std::move(doc_join_info_mapper)),
- qualified_id_storage_(std::move(qualified_id_storage)),
- pre_mapping_fbv_(pre_mapping_fbv),
- use_persistent_hash_map_(use_persistent_hash_map),
- is_info_dirty_(false),
- is_storage_dirty_(false) {}
-
- static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
- InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
- bool pre_mapping_fbv, bool use_persistent_hash_map);
-
- static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
- InitializeExistingFiles(const Filesystem& filesystem,
- std::string&& working_path, bool pre_mapping_fbv,
- bool use_persistent_hash_map);
-
- // Transfers qualified id type joinable index data from the current to
- // new_index and convert to new document id according to
- // document_id_old_to_new. It is a helper function for Optimize.
- //
- // Returns:
- // - OK on success
- // - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status TransferIndex(
- const std::vector<DocumentId>& document_id_old_to_new,
- QualifiedIdJoinIndex* new_index) const;
-
- // Flushes contents of metadata file.
- //
- // Returns:
- // - OK on success
- // - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
-
- // Flushes contents of all storages to underlying files.
- //
- // Returns:
- // - OK on success
- // - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
-
- // Computes and returns Info checksum.
- //
- // Returns:
- // - Crc of the Info on success
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
-
- // Computes and returns all storages checksum.
- //
- // Returns:
- // - Crc of all storages on success
- // - INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
- bool force) override;
-
- Crcs& crcs() override {
- return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
- kCrcsMetadataBufferOffset);
- }
-
- const Crcs& crcs() const override {
- return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
- kCrcsMetadataBufferOffset);
- }
-
- Info& info() {
- return *reinterpret_cast<Info*>(metadata_buffer_.get() +
- kInfoMetadataBufferOffset);
- }
-
- const Info& info() const {
- return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
- kInfoMetadataBufferOffset);
- }
-
- void SetInfoDirty() { is_info_dirty_ = true; }
- // When storage is dirty, we have to set info dirty as well. So just expose
- // SetDirty to set both.
- void SetDirty() {
- is_info_dirty_ = true;
- is_storage_dirty_ = true;
- }
-
- bool is_info_dirty() const { return is_info_dirty_; }
- bool is_storage_dirty() const { return is_storage_dirty_; }
-
- // Metadata buffer
- std::unique_ptr<uint8_t[]> metadata_buffer_;
-
- // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
- // JoinablePropertyId) to another referenced document's qualified id string
- // index in qualified_id_storage_.
- std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+ kWorkingPathType) {}
- // Storage for qualified id strings.
- std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
+ virtual libtextclassifier3::Status PersistStoragesToDisk(
+ bool force) override = 0;
- // TODO(b/268521214): add delete propagation storage
+ virtual libtextclassifier3::Status PersistMetadataToDisk(
+ bool force) override = 0;
- // Flag indicating whether memory map max possible file size for underlying
- // FileBackedVector before growing the actual file size.
- bool pre_mapping_fbv_;
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+ bool force) override = 0;
- // Flag indicating whether use persistent hash map as the key mapper (if
- // false, then fall back to dynamic trie key mapper).
- bool use_persistent_hash_map_;
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override = 0;
- bool is_info_dirty_;
- bool is_storage_dirty_;
+ virtual Crcs& crcs() override = 0;
+ virtual const Crcs& crcs() const override = 0;
};
} // namespace lib
diff --git a/icing/join/qualified-id-join-indexing-handler-v1_test.cc b/icing/join/qualified-id-join-indexing-handler-v1_test.cc
new file mode 100644
index 0000000..9700132
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler-v1_test.cc
@@ -0,0 +1,558 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/join/qualified-id.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Schema type for referenced documents: ReferencedType
+static constexpr std::string_view kReferencedType = "ReferencedType";
+static constexpr std::string_view kPropertyName = "name";
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+// Schema type with joinable property: FakeType
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
+
+static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
+
+// Schema type with nested joinable properties: NestedType
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
+
+static constexpr DocumentId kDefaultDocumentId = 3;
+
+// TODO(b/275121148): remove this test after deprecating
+// QualifiedIdJoinIndexImplV1.
+class QualifiedIdJoinIndexingHandlerV1Test : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+ doc_store_dir_ = base_dir_ + "/doc_store";
+
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ QualifiedIdJoinIndexImplV1::Create(
+ filesystem_, qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kReferencedType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyQualifiedId)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyNestedDoc)
+ .SetDataTypeDocument(
+ kFakeType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyQualifiedId2)
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ doc_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ doc_store_.reset();
+ schema_store_.reset();
+ lang_segmenter_.reset();
+ qualified_id_join_index_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string qualified_id_join_index_dir_;
+ std::string schema_store_dir_;
+ std::string doc_store_dir_;
+
+ std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> doc_store_;
+};
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+ CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(
+ QualifiedIdJoinIndexingHandler::Create(
+ /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ EXPECT_THAT(
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ EXPECT_THAT(
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleJoinableProperty) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleNestedJoinableProperty) {
+ DocumentProto referenced_document1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+ DocumentProto referenced_document2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/2")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "two")
+ .Build();
+
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "nested_type/1")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(
+ std::string(kPropertyNestedDoc),
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "nested_fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/2")
+ .Build())
+ .AddStringProperty(std::string(kPropertyQualifiedId2),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ nested_document));
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle nested_document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/2"));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+ HandleShouldSkipInvalidFormatQualifiedId) {
+ static constexpr std::string_view kInvalidFormatQualifiedId =
+ "invalid_format_qualified_id";
+ ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ std::string(kInvalidFormatQualifiedId))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle document. Should ignore invalid format qualified id.
+ // Index data should remain unchanged since there is no valid qualified id,
+ // but last_added_document_id should be updated.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleShouldSkipEmptyQualifiedId) {
+ // Create a document without any qualified id.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ // Handle document. Index data should remain unchanged since there is no
+ // qualified id, but last_added_document_id should be updated.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+ HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+
+ // Handling document with kInvalidDocumentId should cause a failure, and both
+ // index data and last_added_document_id should remain unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kInvalidDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Recovery mode should get the same result.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kInvalidDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+ HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+
+ // Handling document with document_id < last_added_document_id should cause a
+ // failure, and both index data and last_added_document_id should remain
+ // unchanged.
+ ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Handling document with document_id == last_added_document_id should cause a
+ // failure, and both index data and last_added_document_id should remain
+ // unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+ HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+
+ qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+
+ // Handle document with document_id < last_added_document_id in recovery mode.
+ // We should not get any error, but the handler should ignore the document, so
+ // both index data and last_added_document_id should remain unchanged.
+ ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Handle document with document_id == last_added_document_id in recovery
+ // mode. We should not get any error, but the handler should ignore the
+ // document, so both index data and last_added_document_id should remain
+ // unchanged.
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Handle document with document_id > last_added_document_id in recovery mode.
+ // The handler should index this document and update last_added_document_id.
+ ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, kDefaultDocumentId + 1,
+ /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kDefaultDocumentId + 1));
+ EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+ kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)),
+ IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-join-indexing-handler.cc b/icing/join/qualified-id-join-indexing-handler.cc
index 344cf41..df86cba 100644
--- a/icing/join/qualified-id-join-indexing-handler.cc
+++ b/icing/join/qualified-id-join-indexing-handler.cc
@@ -14,8 +14,13 @@
#include "icing/join/qualified-id-join-indexing-handler.h"
+#include <cstdint>
+#include <limits>
#include <memory>
+#include <optional>
#include <string_view>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -26,7 +31,11 @@
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/logging.pb.h"
#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
#include "icing/util/clock.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -38,12 +47,15 @@ namespace lib {
/* static */ libtextclassifier3::StatusOr<
std::unique_ptr<QualifiedIdJoinIndexingHandler>>
QualifiedIdJoinIndexingHandler::Create(
- const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index) {
+ const Clock* clock, const DocumentStore* doc_store,
+ QualifiedIdJoinIndex* qualified_id_join_index) {
ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(doc_store);
ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
- new QualifiedIdJoinIndexingHandler(clock, qualified_id_join_index));
+ new QualifiedIdJoinIndexingHandler(clock, doc_store,
+ qualified_id_join_index));
}
libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
@@ -69,30 +81,89 @@ libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
}
qualified_id_join_index_.set_last_added_document_id(document_id);
- for (const JoinableProperty<std::string_view>& qualified_id_property :
- tokenized_document.qualified_id_join_properties()) {
- if (qualified_id_property.values.empty()) {
- continue;
+ if (qualified_id_join_index_.is_v2()) {
+ // v2
+ std::optional<DocumentFilterData> filter_data =
+ doc_store_.GetAliveDocumentFilterData(
+ document_id,
+ /*current_time_ms=*/std::numeric_limits<int64_t>::min());
+ if (!filter_data) {
+ // This should not happen.
+ return absl_ports::InternalError(
+ "Failed to get alive document filter data when indexing");
}
- DocJoinInfo info(document_id, qualified_id_property.metadata.id);
- // Currently we only support single (non-repeated) joinable value under a
- // property.
- std::string_view ref_qualified_id_str = qualified_id_property.values[0];
-
- // Attempt to parse qualified id string to make sure the format is correct.
- if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
- // Skip incorrect format of qualified id string to save disk space.
- continue;
+ for (const JoinableProperty<std::string_view>& qualified_id_property :
+ tokenized_document.qualified_id_join_properties()) {
+ // Parse all qualified id strings and convert them to
+ // NamespaceFingerprintIdentifier.
+ std::vector<NamespaceFingerprintIdentifier> ref_doc_ns_fingerprint_ids;
+ for (std::string_view ref_qualified_id_str :
+ qualified_id_property.values) {
+ // Attempt to parse qualified id string to make sure the format is
+ // correct.
+ auto ref_qualified_id_or = QualifiedId::Parse(ref_qualified_id_str);
+ if (!ref_qualified_id_or.ok()) {
+ // Skip incorrect format of qualified id string.
+ continue;
+ }
+
+ QualifiedId ref_qualified_id =
+ std::move(ref_qualified_id_or).ValueOrDie();
+ auto ref_namespace_id_or =
+ doc_store_.GetNamespaceId(ref_qualified_id.name_space());
+ if (!ref_namespace_id_or.ok()) {
+ // Skip invalid namespace id.
+ continue;
+ }
+ NamespaceId ref_namespace_id =
+ std::move(ref_namespace_id_or).ValueOrDie();
+
+ ref_doc_ns_fingerprint_ids.push_back(NamespaceFingerprintIdentifier(
+ ref_namespace_id, ref_qualified_id.uri()));
+ }
+
+ // Batch add all join data of this (schema_type_id, joinable_property_id)
+ // into to the index.
+ libtextclassifier3::Status status = qualified_id_join_index_.Put(
+ filter_data->schema_type_id(), qualified_id_property.metadata.id,
+ document_id, std::move(ref_doc_ns_fingerprint_ids));
+ if (!status.ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to add data into qualified id join index v2 due to: "
+ << status.error_message();
+ return status;
+ }
}
-
- libtextclassifier3::Status status =
- qualified_id_join_index_.Put(info, ref_qualified_id_str);
- if (!status.ok()) {
- ICING_LOG(WARNING)
- << "Failed to add data into qualified id join index due to: "
- << status.error_message();
- return status;
+ } else {
+ // v1
+ // TODO(b/275121148): deprecate this part after rollout v2.
+ for (const JoinableProperty<std::string_view>& qualified_id_property :
+ tokenized_document.qualified_id_join_properties()) {
+ if (qualified_id_property.values.empty()) {
+ continue;
+ }
+
+ DocJoinInfo info(document_id, qualified_id_property.metadata.id);
+ // Currently we only support single (non-repeated) joinable value under a
+ // property.
+ std::string_view ref_qualified_id_str = qualified_id_property.values[0];
+
+ // Attempt to parse qualified id string to make sure the format is
+ // correct.
+ if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
+ // Skip incorrect format of qualified id string to save disk space.
+ continue;
+ }
+
+ libtextclassifier3::Status status =
+ qualified_id_join_index_.Put(info, ref_qualified_id_str);
+ if (!status.ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to add data into qualified id join index due to: "
+ << status.error_message();
+ return status;
+ }
}
}
diff --git a/icing/join/qualified-id-join-indexing-handler.h b/icing/join/qualified-id-join-indexing-handler.h
index f44e45d..8a11bf9 100644
--- a/icing/join/qualified-id-join-indexing-handler.h
+++ b/icing/join/qualified-id-join-indexing-handler.h
@@ -15,11 +15,15 @@
#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
+#include <memory>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/data-indexing-handler.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/util/clock.h"
#include "icing/util/tokenized-document.h"
@@ -37,7 +41,8 @@ class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
// - FAILED_PRECONDITION_ERROR if any of the input pointer is null
static libtextclassifier3::StatusOr<
std::unique_ptr<QualifiedIdJoinIndexingHandler>>
- Create(const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index);
+ Create(const Clock* clock, const DocumentStore* doc_store,
+ QualifiedIdJoinIndex* qualified_id_join_index);
~QualifiedIdJoinIndexingHandler() override = default;
@@ -57,10 +62,13 @@ class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
private:
explicit QualifiedIdJoinIndexingHandler(
- const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index)
+ const Clock* clock, const DocumentStore* doc_store,
+ QualifiedIdJoinIndex* qualified_id_join_index)
: DataIndexingHandler(clock),
+ doc_store_(*doc_store),
qualified_id_join_index_(*qualified_id_join_index) {}
+ const DocumentStore& doc_store_; // Does not own.
QualifiedIdJoinIndex& qualified_id_join_index_; // Does not own.
};
diff --git a/icing/join/qualified-id-join-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc
index 7e89dfa..53d35c7 100644
--- a/icing/join/qualified-id-join-indexing-handler_test.cc
+++ b/icing/join/qualified-id-join-indexing-handler_test.cc
@@ -17,12 +17,19 @@
#include <memory>
#include <string>
#include <string_view>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id.h"
#include "icing/portable/platform.h"
@@ -31,7 +38,11 @@
#include "icing/schema-builder.h"
#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -39,6 +50,7 @@
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
#include "unicode/uloc.h"
@@ -47,9 +59,11 @@ namespace lib {
namespace {
+using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::IsTrue;
+using ::testing::NotNull;
// Schema type for referenced documents: ReferencedType
static constexpr std::string_view kReferencedType = "ReferencedType";
@@ -61,18 +75,11 @@ static constexpr std::string_view kPropertyName = "name";
static constexpr std::string_view kFakeType = "FakeType";
static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
-static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
-
// Schema type with nested joinable properties: NestedType
static constexpr std::string_view kNestedType = "NestedType";
static constexpr std::string_view kPropertyNestedDoc = "nested";
static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
-static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
-static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
-
-static constexpr DocumentId kDefaultDocumentId = 3;
-
class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
protected:
void SetUp() override {
@@ -89,12 +96,12 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
schema_store_dir_ = base_dir_ + "/schema_store";
+ doc_store_dir_ = base_dir_ + "/doc_store";
- ICING_ASSERT_OK_AND_ASSIGN(
- qualified_id_join_index_,
- QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+ QualifiedIdJoinIndexImplV2::Create(
+ filesystem_, qualified_id_join_index_dir_,
+ /*pre_mapping_fbv=*/false));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -140,9 +147,52 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
ICING_ASSERT_OK(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false));
+
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/true,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ doc_store_ = std::move(create_result.document_store);
+
+ // Get FakeType related ids.
+ ICING_ASSERT_OK_AND_ASSIGN(fake_type_id_,
+ schema_store_->GetSchemaTypeId(kFakeType));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ const JoinablePropertyMetadata* metadata1,
+ schema_store_->GetJoinablePropertyMetadata(
+ fake_type_id_, std::string(kPropertyQualifiedId)));
+ ASSERT_THAT(metadata1, NotNull());
+ fake_type_joinable_property_id_ = metadata1->id;
+
+ // Get NestedType related ids.
+ ICING_ASSERT_OK_AND_ASSIGN(nested_type_id_,
+ schema_store_->GetSchemaTypeId(kNestedType));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ const JoinablePropertyMetadata* metadata2,
+ schema_store_->GetJoinablePropertyMetadata(
+ nested_type_id_,
+ absl_ports::StrCat(kPropertyNestedDoc, ".", kPropertyQualifiedId)));
+ ASSERT_THAT(metadata2, NotNull());
+ nested_type_nested_joinable_property_id_ = metadata2->id;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ const JoinablePropertyMetadata* metadata3,
+ schema_store_->GetJoinablePropertyMetadata(
+ nested_type_id_, std::string(kPropertyQualifiedId2)));
+ ASSERT_THAT(metadata3, NotNull());
+ nested_type_joinable_property_id_ = metadata3->id;
}
void TearDown() override {
+ doc_store_.reset();
schema_store_.reset();
lang_segmenter_.reset();
qualified_id_join_index_.reset();
@@ -155,30 +205,77 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
std::string base_dir_;
std::string qualified_id_join_index_dir_;
std::string schema_store_dir_;
+ std::string doc_store_dir_;
- std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+ std::unique_ptr<QualifiedIdJoinIndexImplV2> qualified_id_join_index_;
std::unique_ptr<LanguageSegmenter> lang_segmenter_;
std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> doc_store_;
+
+ // FakeType related ids.
+ SchemaTypeId fake_type_id_;
+ JoinablePropertyId fake_type_joinable_property_id_;
+
+ // NestedType related ids.
+ SchemaTypeId nested_type_id_;
+ JoinablePropertyId nested_type_nested_joinable_property_id_;
+ JoinablePropertyId nested_type_joinable_property_id_;
};
+libtextclassifier3::StatusOr<
+ std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
+GetJoinData(const QualifiedIdJoinIndexImplV2& index,
+ SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
+ index.GetIterator(schema_type_id, joinable_property_id));
+
+ std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
+ while (iter->Advance().ok()) {
+ result.push_back(iter->GetCurrent());
+ }
+
+ return result;
+}
+
TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
- /*clock=*/nullptr, qualified_id_join_index_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ QualifiedIdJoinIndexingHandler::Create(
+ /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
- &fake_clock_, /*qualified_id_join_index=*/nullptr),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ EXPECT_THAT(
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
+ // Create and put referenced (parent) document. Get its document id and
+ // namespace id.
DocumentProto referenced_document =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
.SetSchema(std::string(kReferencedType))
.AddStringProperty(std::string(kPropertyName), "one")
.Build();
-
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+ doc_store_->Put(referenced_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id,
+ doc_store_->GetNamespaceId(referenced_document.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+ /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+ IsOkAndHolds(ref_doc_id));
+
+ // Create and put (child) document. Also tokenize it.
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -186,44 +283,81 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
.AddStringProperty(std::string(kPropertyQualifiedId),
"pkg$db/ns#ref_type/1")
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
- document));
+ std::move(document)));
+ // Handle document.
ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
Eq(kInvalidDocumentId));
- // Handle document.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId,
- /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
IsOk());
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- IsOkAndHolds("pkg$db/ns#ref_type/1"));
+ // Verify the state of qualified_id_join_index_ after Handle().
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain
+ // [(doc_id, ref_doc_ns_fingerprint_id)].
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/doc_id,
+ /*join_info=*/ref_doc_ns_fingerprint_id))));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
+ // Create and put referenced (parent) document1. Get its document id and
+ // namespace id.
DocumentProto referenced_document1 =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
.SetSchema(std::string(kReferencedType))
.AddStringProperty(std::string(kPropertyName), "one")
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id1,
+ doc_store_->Put(referenced_document1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id1,
+ doc_store_->GetNamespaceId(referenced_document1.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id1(
+ /*namespace_id=*/ref_doc_ns_id1,
+ /*target_str=*/referenced_document1.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id1),
+ IsOkAndHolds(ref_doc_id1));
+
+ // Create and put referenced (parent) document2. Get its document id and
+ // namespace id.
DocumentProto referenced_document2 =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/2")
.SetSchema(std::string(kReferencedType))
.AddStringProperty(std::string(kPropertyName), "two")
.Build();
-
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id2,
+ doc_store_->Put(referenced_document2));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id2,
+ doc_store_->GetNamespaceId(referenced_document2.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id2(
+ /*namespace_id=*/ref_doc_ns_id2,
+ /*target_str=*/referenced_document2.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id2),
+ IsOkAndHolds(ref_doc_id2));
+
+ // Create and put (child) document:
+ // - kPropertyNestedDoc.kPropertyQualifiedId refers to referenced_document2.
+ // - kPropertyQualifiedId2 refers to referenced_document1.
+ //
+ // Also tokenize it.
DocumentProto nested_document =
DocumentBuilder()
.SetKey("pkg$db/ns", "nested_type/1")
@@ -239,31 +373,51 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
.AddStringProperty(std::string(kPropertyQualifiedId2),
"pkg$db/ns#ref_type/1")
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id,
+ doc_store_->Put(nested_document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
nested_document));
+ // Handle nested_document.
ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
Eq(kInvalidDocumentId));
- // Handle nested_document.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
- EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
- /*recovery_mode=*/false,
- /*put_document_stats=*/nullptr),
- IsOk());
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
- IsOkAndHolds("pkg$db/ns#ref_type/2"));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
- IsOkAndHolds("pkg$db/ns#ref_type/1"));
+ // Verify the state of qualified_id_join_index_ after Handle().
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
+ // (kNestedType, kPropertyNestedDoc.kPropertyQualifiedId) should contain
+ // [(doc_id, ref_doc_ns_fingerprint_id2)].
+ EXPECT_THAT(
+ GetJoinData(
+ *qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
+ /*joinable_property_id=*/nested_type_nested_joinable_property_id_),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/doc_id,
+ /*join_info=*/ref_doc_ns_fingerprint_id2))));
+ // (kNestedType, kPropertyQualifiedId2) should contain
+ // [(doc_id, ref_doc_ns_fingerprint_id1)].
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
+ /*joinable_property_id=*/nested_type_joinable_property_id_),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/doc_id,
+ /*join_info=*/ref_doc_ns_fingerprint_id1))));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest,
@@ -273,6 +427,8 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Create and put (child) document with an invalid format referenced qualified
+ // id. Also tokenize it.
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -280,71 +436,133 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
.AddStringProperty(std::string(kPropertyQualifiedId),
std::string(kInvalidFormatQualifiedId))
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
document));
+ // Handle document. Should ignore invalid format qualified id.
ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
Eq(kInvalidDocumentId));
- // Handle document. Should ignore invalid format qualified id.
- // Index data should remain unchanged since there is no valid qualified id,
- // but last_added_document_id should be updated.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId,
- /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
IsOk());
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Verify the state of qualified_id_join_index_ after Handle(). Index data
+ // should remain unchanged since there is no valid qualified id, but
+ // last_added_document_id should be updated.
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+ HandleShouldSkipNonExistingNamespace) {
+ static constexpr std::string_view kUnknownNamespace = "UnknownNamespace";
+ // Create and put (child) document which references to a parent qualified id
+ // with an unknown namespace. Also tokenize it.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(
+ std::string(kPropertyQualifiedId),
+ absl_ports::StrCat(kUnknownNamespace, "#", "ref_type/1"))
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+
+ // Handle document.
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(kInvalidDocumentId));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ // Verify the state of qualified_id_join_index_ after Handle().
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should be empty since
+ // "UnknownNamespace#ref_type/1" should be skipped.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
- // Create a document without any qualified id.
+ // Create and put (child) document without any qualified id. Also tokenize it.
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/1")
.SetSchema(std::string(kFakeType))
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
document));
ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+ // Handle document.
ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
Eq(kInvalidDocumentId));
- // Handle document. Index data should remain unchanged since there is no
- // qualified id, but last_added_document_id should be updated.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId,
- /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
IsOk());
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Verify the state of qualified_id_join_index_ after Handle(). Index data
+ // should remain unchanged since there is no qualified id, but
+ // last_added_document_id should be updated.
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest,
HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+ // Create and put referenced (parent) document. Get its document id and
+ // namespace id.
DocumentProto referenced_document =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
.SetSchema(std::string(kReferencedType))
.AddStringProperty(std::string(kPropertyName), "one")
.Build();
-
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+ doc_store_->Put(referenced_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id,
+ doc_store_->GetNamespaceId(referenced_document.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+ /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+ IsOkAndHolds(ref_doc_id));
+
+ // Create and put (child) document. Also tokenize it.
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -352,31 +570,35 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
.AddStringProperty(std::string(kPropertyQualifiedId),
"pkg$db/ns#ref_type/1")
.Build();
+ ICING_ASSERT_OK(doc_store_->Put(document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
- document));
+ std::move(document)));
- qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+ qualified_id_join_index_->set_last_added_document_id(ref_doc_id);
ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
+ Eq(ref_doc_id));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
- // Handling document with kInvalidDocumentId should cause a failure, and both
- // index data and last_added_document_id should remain unchanged.
+ // Handling document with kInvalidDocumentId should cause a failure.
EXPECT_THAT(
handler->Handle(tokenized_document, kInvalidDocumentId,
/*recovery_mode=*/false, /*put_document_stats=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Verify the state of qualified_id_join_index_ after Handle(). Both index
+ // data and last_added_document_id should remain unchanged.
EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ Eq(ref_doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
// Recovery mode should get the same result.
EXPECT_THAT(
@@ -384,21 +606,35 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
/*recovery_mode=*/false, /*put_document_stats=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ Eq(ref_doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest,
HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+ // Create and put referenced (parent) document. Get its document id and
+ // namespace id.
DocumentProto referenced_document =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
.SetSchema(std::string(kReferencedType))
.AddStringProperty(std::string(kPropertyName), "one")
.Build();
-
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+ doc_store_->Put(referenced_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id,
+ doc_store_->GetNamespaceId(referenced_document.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+ /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+ IsOkAndHolds(ref_doc_id));
+
+ // Create and put (child) document. Also tokenize it.
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -406,57 +642,75 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
.AddStringProperty(std::string(kPropertyQualifiedId),
"pkg$db/ns#ref_type/1")
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
- document));
-
- qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
- ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
+ std::move(document)));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
- // Handling document with document_id < last_added_document_id should cause a
- // failure, and both index data and last_added_document_id should remain
- // unchanged.
- ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+ // Handling document with document_id == last_added_document_id should cause a
+ // failure.
+ qualified_id_join_index_->set_last_added_document_id(doc_id);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId - 1,
- /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ // Verify the state of qualified_id_join_index_ after Handle(). Both index
+ // data and last_added_document_id should remain unchanged.
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
- // Handling document with document_id == last_added_document_id should cause a
- // failure, and both index data and last_added_document_id should remain
- // unchanged.
+ // Handling document with document_id < last_added_document_id should cause a
+ // failure.
+ qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(doc_id + 1));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId,
- /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Verify the state of qualified_id_join_index_ after Handle(). Both index
+ // data and last_added_document_id should remain unchanged.
EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ Eq(doc_id + 1));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
}
TEST_F(QualifiedIdJoinIndexingHandlerTest,
- HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+ HandleRecoveryModeShouldIndexDocsGtLastAddedDocId) {
+ // Create and put referenced (parent) document. Get its document id and
+ // namespace id.
DocumentProto referenced_document =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
.SetSchema(std::string(kReferencedType))
.AddStringProperty(std::string(kPropertyName), "one")
.Build();
-
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+ doc_store_->Put(referenced_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id,
+ doc_store_->GetNamespaceId(referenced_document.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+ /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+ IsOkAndHolds(ref_doc_id));
+
+ // Create and put (child) document. Also tokenize it.
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -464,60 +718,109 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
.AddStringProperty(std::string(kPropertyQualifiedId),
"pkg$db/ns#ref_type/1")
.Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
- document));
-
- qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
- ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
+ std::move(document)));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
- QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
qualified_id_join_index_.get()));
- // Handle document with document_id < last_added_document_id in recovery mode.
- // We should not get any error, but the handler should ignore the document, so
- // both index data and last_added_document_id should remain unchanged.
- ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+ // Handle document with document_id > last_added_document_id in recovery mode.
+ // The handler should index this document and update last_added_document_id.
+ qualified_id_join_index_->set_last_added_document_id(doc_id - 1);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(doc_id - 1));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId - 1,
- /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+ /*put_document_stats=*/nullptr),
IsOk());
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(
+ ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+ /*document_id=*/doc_id,
+ /*join_info=*/ref_doc_ns_fingerprint_id))));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+ HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId) {
+ // Create and put referenced (parent) document. Get its document id and
+ // namespace id.
+ DocumentProto referenced_document =
+ DocumentBuilder()
+ .SetKey("pkg$db/ns", "ref_type/1")
+ .SetSchema(std::string(kReferencedType))
+ .AddStringProperty(std::string(kPropertyName), "one")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+ doc_store_->Put(referenced_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId ref_doc_ns_id,
+ doc_store_->GetNamespaceId(referenced_document.namespace_()));
+ NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+ /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+ ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+ IsOkAndHolds(ref_doc_id));
+
+ // Create and put (child) document. Also tokenize it.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyQualifiedId),
+ "pkg$db/ns#ref_type/1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+ qualified_id_join_index_.get()));
// Handle document with document_id == last_added_document_id in recovery
// mode. We should not get any error, but the handler should ignore the
// document, so both index data and last_added_document_id should remain
// unchanged.
+ qualified_id_join_index_->set_last_added_document_id(doc_id);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId,
- /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+ /*put_document_stats=*/nullptr),
IsOk());
- EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
- // Handle document with document_id > last_added_document_id in recovery mode.
- // The handler should index this document and update last_added_document_id.
- ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue());
+ // Handle document with document_id < last_added_document_id in recovery mode.
+ // We should not get any error, but the handler should ignore the document, so
+ // both index data and last_added_document_id should remain unchanged.
+ qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
+ ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+ Eq(doc_id + 1));
EXPECT_THAT(
- handler->Handle(tokenized_document, kDefaultDocumentId + 1,
- /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+ handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+ /*put_document_stats=*/nullptr),
IsOk());
EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
- Eq(kDefaultDocumentId + 1));
- EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
- kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)),
- IsOkAndHolds("pkg$db/ns#ref_type/1"));
+ Eq(doc_id + 1));
+ // (kFakeType, kPropertyQualifiedId) should contain nothing.
+ EXPECT_THAT(
+ GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+ /*joinable_property_id=*/fake_type_joinable_property_id_),
+ IsOkAndHolds(IsEmpty()));
}
} // namespace
diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc
index dd63784..ec7e277 100644
--- a/icing/legacy/index/icing-dynamic-trie_test.cc
+++ b/icing/legacy/index/icing-dynamic-trie_test.cc
@@ -716,7 +716,7 @@ TEST_F(IcingDynamicTrieTest, Properties) {
static const uint32_t kOne = 1;
uint32_t val_idx;
- trie.Insert("abcd", &kOne, &val_idx, false);
+ ICING_ASSERT_OK(trie.Insert("abcd", &kOne, &val_idx, false));
trie.SetProperty(val_idx, 0);
trie.SetProperty(val_idx, 3);
@@ -736,7 +736,7 @@ TEST_F(IcingDynamicTrieTest, Properties) {
}
// Persist after sync.
- trie.Insert("abcd", &kOne, &val_idx, false);
+ ICING_ASSERT_OK(trie.Insert("abcd", &kOne, &val_idx, false));
trie.SetProperty(val_idx, 1);
ASSERT_TRUE(trie.Sync());
trie.Close();
@@ -770,15 +770,15 @@ TEST_F(IcingDynamicTrieTest, ClearSingleProperty) {
static const uint32_t kOne = 1;
uint32_t val_idx[3];
- trie.Insert("abcd", &kOne, &val_idx[0], false);
+ ICING_ASSERT_OK(trie.Insert("abcd", &kOne, &val_idx[0], false));
trie.SetProperty(val_idx[0], 0);
trie.SetProperty(val_idx[0], 3);
- trie.Insert("efgh", &kOne, &val_idx[1], false);
+ ICING_ASSERT_OK(trie.Insert("efgh", &kOne, &val_idx[1], false));
trie.SetProperty(val_idx[1], 0);
trie.SetProperty(val_idx[1], 3);
- trie.Insert("ijkl", &kOne, &val_idx[2], false);
+ ICING_ASSERT_OK(trie.Insert("ijkl", &kOne, &val_idx[2], false));
trie.SetProperty(val_idx[2], 0);
trie.SetProperty(val_idx[2], 3);
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
index 558da1c..76e41ce 100644
--- a/icing/monkey_test/icing-monkey-test-runner.cc
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -15,16 +15,33 @@
#include "icing/monkey_test/icing-monkey-test-runner.h"
#include <algorithm>
+#include <array>
#include <cstdint>
#include <functional>
+#include <memory>
+#include <random>
#include <string>
+#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/icing-search-engine.h"
#include "icing/monkey_test/in-memory-icing-search-engine.h"
#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/result-state-manager.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/logging.h"
@@ -37,17 +54,10 @@ namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::Eq;
using ::testing::Le;
+using ::testing::Not;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAreArray;
-SchemaProto GenerateRandomSchema(
- const IcingMonkeyTestRunnerConfiguration& config,
- MonkeyTestRandomEngine* random) {
- MonkeySchemaGenerator schema_generator(random);
- return schema_generator.GenerateSchema(config.num_types,
- config.possible_num_properties);
-}
-
SearchSpecProto GenerateRandomSearchSpecProto(
MonkeyTestRandomEngine* random,
MonkeyDocumentGenerator* document_generator) {
@@ -164,20 +174,13 @@ void SortDocuments(std::vector<DocumentProto>& documents) {
} // namespace
IcingMonkeyTestRunner::IcingMonkeyTestRunner(
- const IcingMonkeyTestRunnerConfiguration& config)
- : config_(config), random_(config.seed), in_memory_icing_() {
+ IcingMonkeyTestRunnerConfiguration config)
+ : config_(std::move(config)),
+ random_(config_.seed),
+ in_memory_icing_(std::make_unique<InMemoryIcingSearchEngine>(&random_)),
+ schema_generator_(
+ std::make_unique<MonkeySchemaGenerator>(&random_, &config_)) {
ICING_LOG(INFO) << "Monkey test runner started with seed: " << config_.seed;
-
- SchemaProto schema = GenerateRandomSchema(config_, &random_);
- ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString();
-
- in_memory_icing_ =
- std::make_unique<InMemoryIcingSearchEngine>(&random_, std::move(schema));
-
- document_generator_ = std::make_unique<MonkeyDocumentGenerator>(
- &random_, in_memory_icing_->GetSchema(), config_.possible_num_tokens_,
- config_.num_namespaces, config_.num_uris);
-
std::string dir = GetTestTempDir() + "/icing/monkey";
filesystem_.DeleteDirectoryRecursively(dir.c_str());
icing_dir_ = std::make_unique<DestructibleDirectory>(&filesystem_, dir);
@@ -186,7 +189,7 @@ IcingMonkeyTestRunner::IcingMonkeyTestRunner(
void IcingMonkeyTestRunner::Run(uint32_t num) {
ASSERT_TRUE(icing_ != nullptr)
<< "Icing search engine has not yet been created. Please call "
- "CreateIcingSearchEngineWithSchema() first";
+ "Initialize() first";
uint32_t frequency_sum = 0;
for (const auto& schedule : config_.monkey_api_schedules) {
@@ -208,10 +211,55 @@ void IcingMonkeyTestRunner::Run(uint32_t num) {
}
}
-void IcingMonkeyTestRunner::CreateIcingSearchEngineWithSchema() {
+SetSchemaResultProto IcingMonkeyTestRunner::SetSchema(SchemaProto&& schema) {
+ in_memory_icing_->SetSchema(std::move(schema));
+ document_generator_ = std::make_unique<MonkeyDocumentGenerator>(
+ &random_, in_memory_icing_->GetSchema(), &config_);
+ return icing_->SetSchema(*in_memory_icing_->GetSchema(),
+ /*ignore_errors_and_delete_documents=*/true);
+}
+
+void IcingMonkeyTestRunner::Initialize() {
ASSERT_NO_FATAL_FAILURE(CreateIcingSearchEngine());
- ASSERT_THAT(icing_->SetSchema(*in_memory_icing_->GetSchema()).status(),
- ProtoIsOk());
+
+ SchemaProto schema = schema_generator_->GenerateSchema();
+ ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString();
+
+ ASSERT_THAT(SetSchema(std::move(schema)).status(), ProtoIsOk());
+}
+
+void IcingMonkeyTestRunner::DoUpdateSchema() {
+ ICING_LOG(INFO) << "Monkey updating schema";
+
+ MonkeySchemaGenerator::UpdateSchemaResult result =
+ schema_generator_->UpdateSchema(*in_memory_icing_->GetSchema());
+ if (result.is_invalid_schema) {
+ SetSchemaResultProto set_schema_result =
+ icing_->SetSchema(result.schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ ASSERT_THAT(set_schema_result.status(), Not(ProtoIsOk()));
+ return;
+ }
+ ICING_LOG(DBG) << "Updating schema to: " << result.schema.DebugString();
+ SetSchemaResultProto icing_set_schema_result =
+ SetSchema(std::move(result.schema));
+ ASSERT_THAT(icing_set_schema_result.status(), ProtoIsOk());
+ ASSERT_THAT(icing_set_schema_result.deleted_schema_types(),
+ UnorderedElementsAreArray(result.schema_types_deleted));
+ ASSERT_THAT(icing_set_schema_result.incompatible_schema_types(),
+ UnorderedElementsAreArray(result.schema_types_incompatible));
+ ASSERT_THAT(
+ icing_set_schema_result.index_incompatible_changed_schema_types(),
+ UnorderedElementsAreArray(result.schema_types_index_incompatible));
+
+ // Update in-memory icing
+ for (const std::string& deleted_type : result.schema_types_deleted) {
+ ICING_ASSERT_OK(in_memory_icing_->DeleteBySchemaType(deleted_type));
+ }
+ for (const std::string& incompatible_type :
+ result.schema_types_incompatible) {
+ ICING_ASSERT_OK(in_memory_icing_->DeleteBySchemaType(incompatible_type));
+ }
}
void IcingMonkeyTestRunner::DoGet() {
@@ -266,10 +314,11 @@ void IcingMonkeyTestRunner::DoDelete() {
/*p_other=*/0.1);
ICING_LOG(INFO) << "Monkey deleting namespace: " << document.name_space
<< ", uri: " << document.uri;
- in_memory_icing_->Delete(document.name_space, document.uri);
DeleteResultProto delete_result =
icing_->Delete(document.name_space, document.uri);
if (document.document.has_value()) {
+ ICING_ASSERT_OK(
+ in_memory_icing_->Delete(document.name_space, document.uri));
ASSERT_THAT(delete_result.status(), ProtoIsOk())
<< "Cannot delete an existing document.";
} else {
@@ -383,8 +432,8 @@ void IcingMonkeyTestRunner::DoSearch() {
ICING_VLOG(1) << "scoring_spec:\n" << scoring_spec->DebugString();
ICING_VLOG(1) << "result_spec:\n" << result_spec->DebugString();
- std::vector<DocumentProto> exp_documents =
- in_memory_icing_->Search(*search_spec);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentProto> exp_documents,
+ in_memory_icing_->Search(*search_spec));
SearchResultProto search_result =
icing_->Search(*search_spec, *scoring_spec, *result_spec);
diff --git a/icing/monkey_test/icing-monkey-test-runner.h b/icing/monkey_test/icing-monkey-test-runner.h
index fbaaaaa..10be60c 100644
--- a/icing/monkey_test/icing-monkey-test-runner.h
+++ b/icing/monkey_test/icing-monkey-test-runner.h
@@ -16,63 +16,36 @@
#define ICING_MONKEY_TEST_ICING_MONKEY_TEST_RUNNER_H_
#include <cstdint>
-#include <random>
+#include <memory>
#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
#include "icing/icing-search-engine.h"
#include "icing/monkey_test/in-memory-icing-search-engine.h"
#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/proto/schema.pb.h"
namespace icing {
namespace lib {
-class IcingMonkeyTestRunner;
-
-struct IcingMonkeyTestRunnerConfiguration {
- explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types,
- int num_namespaces, int num_uris,
- int index_merge_size)
- : seed(seed),
- num_types(num_types),
- num_namespaces(num_namespaces),
- num_uris(num_uris),
- index_merge_size(index_merge_size) {}
-
- uint32_t seed;
- int num_types;
- int num_namespaces;
- int num_uris;
- int index_merge_size;
-
- // The possible number of properties that may appear in generated schema
- // types.
- std::vector<int> possible_num_properties;
-
- // The possible number of tokens that may appear in generated documents, with
- // a noise factor from 0.5 to 1 applied.
- std::vector<int> possible_num_tokens_;
-
- // An array of pairs of monkey test APIs with frequencies.
- // If f_sum is the sum of all the frequencies, an operation with frequency f
- // means for every f_sum iterations, the operation is expected to run f times.
- std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
- monkey_api_schedules;
-};
-
class IcingMonkeyTestRunner {
public:
- IcingMonkeyTestRunner(const IcingMonkeyTestRunnerConfiguration& config);
+ IcingMonkeyTestRunner(IcingMonkeyTestRunnerConfiguration config);
IcingMonkeyTestRunner(const IcingMonkeyTestRunner&) = delete;
IcingMonkeyTestRunner& operator=(const IcingMonkeyTestRunner&) = delete;
+ SetSchemaResultProto SetSchema(SchemaProto&& schema);
+
// This function must and should only be called before running the monkey
// test.
- void CreateIcingSearchEngineWithSchema();
+ void Initialize();
// Run the monkey test with num operations.
void Run(uint32_t num);
// APIs supported in icing search engine.
+ void DoUpdateSchema();
void DoGet();
void DoGetAllNamespaces();
void DoPut();
@@ -94,6 +67,7 @@ class IcingMonkeyTestRunner {
std::unique_ptr<InMemoryIcingSearchEngine> in_memory_icing_;
std::unique_ptr<IcingSearchEngine> icing_;
+ std::unique_ptr<MonkeySchemaGenerator> schema_generator_;
std::unique_ptr<MonkeyDocumentGenerator> document_generator_;
void CreateIcingSearchEngine();
diff --git a/icing/monkey_test/icing-search-engine_monkey_test.cc b/icing/monkey_test/icing-search-engine_monkey_test.cc
index a24e57f..436e27b 100644
--- a/icing/monkey_test/icing-search-engine_monkey_test.cc
+++ b/icing/monkey_test/icing-search-engine_monkey_test.cc
@@ -12,9 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <cstdint>
+#include <random>
+#include <utility>
+
#include "gtest/gtest.h"
#include "icing/monkey_test/icing-monkey-test-runner.h"
+#include "icing/monkey_test/monkey-test-util.h"
#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/schema/section.h"
+#include "icing/util/logging.h"
namespace icing {
namespace lib {
@@ -44,13 +52,14 @@ TEST(IcingSearchEngineMonkeyTest, MonkeyTest) {
{&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
{&IcingMonkeyTestRunner::DoDelete, 50},
{&IcingMonkeyTestRunner::DoDeleteByNamespace, 50},
- {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 50},
+ {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 45},
{&IcingMonkeyTestRunner::DoDeleteByQuery, 20},
{&IcingMonkeyTestRunner::DoOptimize, 5},
+ {&IcingMonkeyTestRunner::DoUpdateSchema, 5},
{&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
uint32_t num_iterations = IsAndroidArm() ? 1000 : 5000;
- IcingMonkeyTestRunner runner(config);
- ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema());
+ IcingMonkeyTestRunner runner(std::move(config));
+ ASSERT_NO_FATAL_FAILURE(runner.Initialize());
ASSERT_NO_FATAL_FAILURE(runner.Run(num_iterations));
}
@@ -75,8 +84,8 @@ TEST(DISABLED_IcingSearchEngineMonkeyTest, MonkeyManyDocTest) {
{&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
{&IcingMonkeyTestRunner::DoOptimize, 5},
{&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
- IcingMonkeyTestRunner runner(config);
- ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema());
+ IcingMonkeyTestRunner runner(std::move(config));
+ ASSERT_NO_FATAL_FAILURE(runner.Initialize());
// Pre-fill with 4 million documents
SetLoggingLevel(LogSeverity::WARNING);
for (int i = 0; i < 4000000; i++) {
diff --git a/icing/monkey_test/in-memory-icing-search-engine.cc b/icing/monkey_test/in-memory-icing-search-engine.cc
index 405a7b0..7baa06e 100644
--- a/icing/monkey_test/in-memory-icing-search-engine.cc
+++ b/icing/monkey_test/in-memory-icing-search-engine.cc
@@ -14,15 +14,27 @@
#include "icing/monkey_test/in-memory-icing-search-engine.h"
+#include <algorithm>
#include <cstdint>
+#include <memory>
+#include <random>
+#include <string>
#include <string_view>
#include <unordered_set>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/document-id.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -38,9 +50,80 @@ bool IsPrefix(std::string_view s1, std::string_view s2) {
return s1 == s2.substr(0, s1.length());
}
-bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
- const std::string &query,
- TermMatchType::Code term_match_type) {
+} // namespace
+
+libtextclassifier3::StatusOr<const PropertyConfigProto *>
+InMemoryIcingSearchEngine::GetPropertyConfig(
+ const std::string &schema_type, const std::string &property_name) const {
+ auto schema_iter = property_config_map_.find(schema_type);
+ if (schema_iter == property_config_map_.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Schema type: ", schema_type, " is not found."));
+ }
+ auto property_iter = schema_iter->second.find(property_name);
+ if (property_iter == schema_iter->second.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Property: ", property_name, " is not found."));
+ }
+ return &property_iter->second;
+}
+
+libtextclassifier3::StatusOr<TermMatchType::Code>
+InMemoryIcingSearchEngine::GetTermMatchType(
+ const std::string &schema_type,
+ const MonkeyTokenizedSection &section) const {
+ bool in_indexable_properties_list = false;
+ bool all_indexable_from_top = true;
+
+ std::vector<std::string_view> properties_in_path =
+ absl_ports::StrSplit(section.path, ".");
+ if (properties_in_path.empty()) {
+ return absl_ports::InvalidArgumentError("Got empty path.");
+ }
+ std::string curr_schema_type = schema_type;
+ for (int i = 0; i < properties_in_path.size(); ++i) {
+ ICING_ASSIGN_OR_RETURN(
+ const PropertyConfigProto *prop,
+ GetPropertyConfig(curr_schema_type,
+ std::string(properties_in_path[i])));
+ if (prop->data_type() == PropertyConfigProto::DataType::STRING) {
+ return prop->string_indexing_config().term_match_type();
+ }
+
+ if (prop->data_type() != PropertyConfigProto::DataType::DOCUMENT) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+
+ bool old_all_indexable_from_top = all_indexable_from_top;
+ all_indexable_from_top &=
+ prop->document_indexing_config().index_nested_properties();
+ if (!all_indexable_from_top && !in_indexable_properties_list) {
+ // Only try to update in_indexable_properties_list if this is the first
+ // level with index_nested_properties=false.
+ if (old_all_indexable_from_top) {
+ auto &indexable_properties =
+ prop->document_indexing_config().indexable_nested_properties_list();
+ std::string relative_path =
+ absl_ports::StrCatPieces(std::vector<std::string_view>(
+ properties_in_path.begin() + i + 1, properties_in_path.end()));
+ in_indexable_properties_list =
+ std::find(indexable_properties.begin(), indexable_properties.end(),
+ relative_path) != indexable_properties.end();
+ }
+ // Check in_indexable_properties_list again.
+ if (!in_indexable_properties_list) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+ }
+ curr_schema_type = prop->document_indexing_config().GetTypeName();
+ }
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+}
+
+libtextclassifier3::StatusOr<bool>
+InMemoryIcingSearchEngine::DoesDocumentMatchQuery(
+ const MonkeyTokenizedDocument &document, const std::string &query,
+ TermMatchType::Code term_match_type) const {
std::vector<std::string_view> strs = absl_ports::StrSplit(query, ":");
std::string_view query_term;
std::string_view section_restrict;
@@ -54,8 +137,15 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
if (!section_restrict.empty() && section.path != section_restrict) {
continue;
}
+ ICING_ASSIGN_OR_RETURN(
+ TermMatchType::Code section_term_match_type,
+ GetTermMatchType(document.document.schema(), section));
+ if (section_term_match_type == TermMatchType::UNKNOWN) {
+ // Skip non-indexable property.
+ continue;
+ }
for (const std::string &token : section.token_sequence) {
- if (section.term_match_type == TermMatchType::EXACT_ONLY ||
+ if (section_term_match_type == TermMatchType::EXACT_ONLY ||
term_match_type == TermMatchType::EXACT_ONLY) {
if (token == query_term) {
return true;
@@ -68,7 +158,18 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
return false;
}
-} // namespace
+void InMemoryIcingSearchEngine::SetSchema(SchemaProto &&schema) {
+ schema_ = std::make_unique<SchemaProto>(std::move(schema));
+ property_config_map_.clear();
+ for (const SchemaTypeConfigProto &type_config : schema_->types()) {
+ auto &curr_property_map = property_config_map_[type_config.schema_type()];
+ for (const PropertyConfigProto &property_config :
+ type_config.properties()) {
+ curr_property_map.insert(
+ {property_config.property_name(), property_config});
+ }
+ }
+}
InMemoryIcingSearchEngine::PickDocumentResult
InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
@@ -121,7 +222,7 @@ InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
void InMemoryIcingSearchEngine::Put(const MonkeyTokenizedDocument &document) {
// Delete the old one if existing.
- Delete(document.document.namespace_(), document.document.uri());
+ Delete(document.document.namespace_(), document.document.uri()).IgnoreError();
existing_doc_ids_.push_back(documents_.size());
namespace_uri_docid_map[document.document.namespace_()]
[document.document.uri()] = documents_.size();
@@ -192,7 +293,8 @@ InMemoryIcingSearchEngine::DeleteBySchemaType(const std::string &schema_type) {
libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
const SearchSpecProto &search_spec) {
- std::vector<DocumentId> doc_ids_to_delete = InternalSearch(search_spec);
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> doc_ids_to_delete,
+ InternalSearch(search_spec));
for (DocumentId doc_id : doc_ids_to_delete) {
const DocumentProto &document = documents_[doc_id].document;
if (!Delete(document.namespace_(), document.uri()).ok()) {
@@ -204,9 +306,10 @@ libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
return doc_ids_to_delete.size();
}
-std::vector<DocumentProto> InMemoryIcingSearchEngine::Search(
- const SearchSpecProto &search_spec) const {
- std::vector<DocumentId> matched_doc_ids = InternalSearch(search_spec);
+libtextclassifier3::StatusOr<std::vector<DocumentProto>>
+InMemoryIcingSearchEngine::Search(const SearchSpecProto &search_spec) const {
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> matched_doc_ids,
+ InternalSearch(search_spec));
std::vector<DocumentProto> result;
result.reserve(matched_doc_ids.size());
for (DocumentId doc_id : matched_doc_ids) {
@@ -229,12 +332,16 @@ libtextclassifier3::StatusOr<DocumentId> InMemoryIcingSearchEngine::InternalGet(
" is not found by InMemoryIcingSearchEngine::InternalGet."));
}
-std::vector<DocumentId> InMemoryIcingSearchEngine::InternalSearch(
+libtextclassifier3::StatusOr<std::vector<DocumentId>>
+InMemoryIcingSearchEngine::InternalSearch(
const SearchSpecProto &search_spec) const {
std::vector<DocumentId> matched_doc_ids;
for (DocumentId doc_id : existing_doc_ids_) {
- if (DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
- search_spec.term_match_type())) {
+ ICING_ASSIGN_OR_RETURN(
+ bool match,
+ DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
+ search_spec.term_match_type()));
+ if (match) {
matched_doc_ids.push_back(doc_id);
}
}
diff --git a/icing/monkey_test/in-memory-icing-search-engine.h b/icing/monkey_test/in-memory-icing-search-engine.h
index a5d8872..98e7e4c 100644
--- a/icing/monkey_test/in-memory-icing-search-engine.h
+++ b/icing/monkey_test/in-memory-icing-search-engine.h
@@ -16,18 +16,21 @@
#define ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
#include <cstdint>
+#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
#include "icing/monkey_test/monkey-tokenized-document.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/store/document-id.h"
namespace icing {
@@ -43,15 +46,14 @@ class InMemoryIcingSearchEngine {
std::optional<DocumentProto> document;
};
- InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random,
- SchemaProto &&schema)
- : random_(random),
- schema_(std::make_unique<SchemaProto>(std::move(schema))) {}
+ InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random) : random_(random) {}
uint32_t GetNumAliveDocuments() const { return existing_doc_ids_.size(); }
const SchemaProto *GetSchema() const { return schema_.get(); }
+ void SetSchema(SchemaProto &&schema);
+
// Randomly pick a document from the in-memory Icing for monkey testing.
//
// p_alive: chance of getting an alive document.
@@ -112,7 +114,8 @@ class InMemoryIcingSearchEngine {
// Currently, only the "query" and "term_match_type" fields are recognized by
// the in-memory Icing, and only single term queries with possible section
// restrictions are supported.
- std::vector<DocumentProto> Search(const SearchSpecProto &search_spec) const;
+ libtextclassifier3::StatusOr<std::vector<DocumentProto>> Search(
+ const SearchSpecProto &search_spec) const;
private:
// Does not own.
@@ -126,6 +129,11 @@ class InMemoryIcingSearchEngine {
namespace_uri_docid_map;
std::unique_ptr<SchemaProto> schema_;
+ // A map that maps from (schema_type, property_name) to the corresponding
+ // PropertyConfigProto.
+ std::unordered_map<
+ std::string, std::unordered_map<std::string, const PropertyConfigProto &>>
+ property_config_map_;
// Finds and returns the internal document id for the document identified by
// the given key (namespace, uri)
@@ -138,8 +146,19 @@ class InMemoryIcingSearchEngine {
// A helper method for DeleteByQuery and Search to get matched internal doc
// ids.
- std::vector<DocumentId> InternalSearch(
+ libtextclassifier3::StatusOr<std::vector<DocumentId>> InternalSearch(
const SearchSpecProto &search_spec) const;
+
+ libtextclassifier3::StatusOr<const PropertyConfigProto *> GetPropertyConfig(
+ const std::string &schema_type, const std::string &property_name) const;
+
+ libtextclassifier3::StatusOr<TermMatchType::Code> GetTermMatchType(
+ const std::string &schema_type,
+ const MonkeyTokenizedSection &section) const;
+
+ libtextclassifier3::StatusOr<bool> DoesDocumentMatchQuery(
+ const MonkeyTokenizedDocument &document, const std::string &query,
+ TermMatchType::Code term_match_type) const;
};
} // namespace lib
diff --git a/icing/monkey_test/monkey-test-generators.cc b/icing/monkey_test/monkey-test-generators.cc
index 7b2ff56..0d5ad73 100644
--- a/icing/monkey_test/monkey-test-generators.cc
+++ b/icing/monkey_test/monkey-test-generators.cc
@@ -14,79 +14,269 @@
#include "icing/monkey_test/monkey-test-generators.h"
+#include <array>
+#include <cstdint>
+#include <random>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/document-builder.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+
namespace icing {
namespace lib {
-SchemaProto MonkeySchemaGenerator::GenerateSchema(
- int num_types, const std::vector<int>& possible_num_properties) const {
+namespace {
+
+constexpr std::array<PropertyConfigProto::Cardinality::Code, 3> kCardinalities =
+ {PropertyConfigProto::Cardinality::REPEATED,
+ PropertyConfigProto::Cardinality::OPTIONAL,
+ PropertyConfigProto::Cardinality::REQUIRED};
+
+constexpr std::array<TermMatchType::Code, 3> kTermMatchTypes = {
+ TermMatchType::UNKNOWN, TermMatchType::EXACT_ONLY, TermMatchType::PREFIX};
+
+PropertyConfigProto::Cardinality::Code GetRandomCardinality(
+ MonkeyTestRandomEngine* random) {
+ std::uniform_int_distribution<> dist(0, kCardinalities.size() - 1);
+ return kCardinalities[dist(*random)];
+}
+
+TermMatchType::Code GetRandomTermMatchType(MonkeyTestRandomEngine* random) {
+ std::uniform_int_distribution<> dist(0, kTermMatchTypes.size() - 1);
+ return kTermMatchTypes[dist(*random)];
+}
+
+// TODO: Update this function when supporting document_indexing_config.
+bool IsIndexableProperty(const PropertyConfigProto& property) {
+ return property.string_indexing_config().term_match_type() !=
+ TermMatchType::UNKNOWN;
+}
+
+void SetStringIndexingConfig(PropertyConfigProto& property,
+ TermMatchType::Code term_match_type) {
+ if (term_match_type != TermMatchType::UNKNOWN) {
+ StringIndexingConfig* string_indexing_config =
+ property.mutable_string_indexing_config();
+ string_indexing_config->set_term_match_type(term_match_type);
+ // TODO: Try to add different TokenizerTypes. VERBATIM, RFC822, and URL are
+ // the remaining candidates to consider.
+ string_indexing_config->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ } else {
+ property.clear_string_indexing_config();
+ }
+}
+
+} // namespace
+
+SchemaProto MonkeySchemaGenerator::GenerateSchema() {
SchemaProto schema;
- std::uniform_int_distribution<> dist(0, possible_num_properties.size() - 1);
- while (num_types > 0) {
- int num_properties = possible_num_properties[dist(*random_)];
- *schema.add_types() = GenerateType(
- "MonkeyTestType" + std::to_string(num_types), num_properties);
- --num_types;
+ for (int i = 0; i < config_->num_types; ++i) {
+ *schema.add_types() = GenerateType();
}
return schema;
}
+MonkeySchemaGenerator::UpdateSchemaResult MonkeySchemaGenerator::UpdateSchema(
+ const SchemaProto& schema) {
+ UpdateSchemaResult result = {std::move(schema)};
+ SchemaProto& new_schema = result.schema;
+
+ // Delete up to 2 existing types.
+ std::uniform_int_distribution<> num_types_to_delete_dist(0, 2);
+ for (int num_types_to_delete = num_types_to_delete_dist(*random_);
+ num_types_to_delete >= 0; --num_types_to_delete) {
+ if (new_schema.types_size() > 0) {
+ std::uniform_int_distribution<> dist(0, new_schema.types_size() - 1);
+ int index_to_delete = dist(*random_);
+ result.schema_types_deleted.insert(
+ new_schema.types(index_to_delete).schema_type());
+ new_schema.mutable_types()->SwapElements(index_to_delete,
+ new_schema.types_size() - 1);
+ new_schema.mutable_types()->RemoveLast();
+ }
+ }
+
+ // Updating about 1/3 of existing types.
+ for (int i = 0; i < new_schema.types_size(); ++i) {
+ std::uniform_int_distribution<> dist(0, 2);
+ if (dist(*random_) == 0) {
+ UpdateType(*new_schema.mutable_types(i), result);
+ }
+ }
+
+ // Add up to 2 new types.
+ std::uniform_int_distribution<> num_types_to_add_dist(0, 2);
+ for (int num_types_to_add = num_types_to_add_dist(*random_);
+ num_types_to_add >= 0; --num_types_to_add) {
+ *new_schema.add_types() = GenerateType();
+ }
+
+ return result;
+}
+
PropertyConfigProto MonkeySchemaGenerator::GenerateProperty(
- std::string_view name, TermMatchType::Code term_match_type) const {
+ const SchemaTypeConfigProto& type_config,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ TermMatchType::Code term_match_type) {
PropertyConfigProto prop;
- prop.set_property_name(name.data(), name.length());
+ prop.set_property_name(
+ "MonkeyTestProp" +
+ std::to_string(num_properties_generated_[type_config.schema_type()]++));
// TODO: Perhaps in future iterations we will want to generate more than just
// string properties.
prop.set_data_type(PropertyConfigProto::DataType::STRING);
+ prop.set_cardinality(cardinality);
+ SetStringIndexingConfig(prop, term_match_type);
+ return prop;
+}
- constexpr std::array<PropertyConfigProto::Cardinality::Code, 3>
- cardinalities = {PropertyConfigProto::Cardinality::REPEATED,
- PropertyConfigProto::Cardinality::OPTIONAL,
- PropertyConfigProto::Cardinality::REQUIRED};
- std::uniform_int_distribution<> dist(0, cardinalities.size() - 1);
- prop.set_cardinality(cardinalities[dist(*random_)]);
+void MonkeySchemaGenerator::UpdateProperty(
+ const SchemaTypeConfigProto& type_config, PropertyConfigProto& property,
+ UpdateSchemaResult& result) {
+ PropertyConfigProto::Cardinality::Code new_cardinality =
+ GetRandomCardinality(random_);
+ if (new_cardinality != property.cardinality()) {
+ // Only do compatible cardinality update for now, otherwise it would be hard
+ // to track which documents will be invalid after updating the schema.
+ //
+ // The following type of updates are not allowed:
+ // - optional -> required
+ // - repeated -> optional
+ // - repeated -> required
+ if (property.cardinality() == PropertyConfigProto::Cardinality::OPTIONAL &&
+ new_cardinality == PropertyConfigProto::Cardinality::REQUIRED) {
+ return;
+ }
+ if (property.cardinality() == PropertyConfigProto::Cardinality::REPEATED &&
+ (new_cardinality == PropertyConfigProto::Cardinality::OPTIONAL ||
+ new_cardinality == PropertyConfigProto::Cardinality::REQUIRED)) {
+ return;
+ }
+ property.set_cardinality(new_cardinality);
+ }
- if (term_match_type != TermMatchType::UNKNOWN) {
- StringIndexingConfig* string_indexing_config =
- prop.mutable_string_indexing_config();
- string_indexing_config->set_term_match_type(term_match_type);
- string_indexing_config->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ if (property.data_type() == PropertyConfigProto::DataType::STRING) {
+ TermMatchType::Code new_term_match_type = GetRandomTermMatchType(random_);
+ if (new_term_match_type !=
+ property.string_indexing_config().term_match_type()) {
+ SetStringIndexingConfig(property, new_term_match_type);
+ result.schema_types_index_incompatible.insert(type_config.schema_type());
+ }
}
- return prop;
}
-SchemaTypeConfigProto MonkeySchemaGenerator::GenerateType(
- std::string_view name, int num_properties) const {
+SchemaTypeConfigProto MonkeySchemaGenerator::GenerateType() {
SchemaTypeConfigProto type_config;
- type_config.set_schema_type(name.data(), name.length());
+ type_config.set_schema_type("MonkeyTestType" +
+ std::to_string(num_types_generated_++));
+ std::uniform_int_distribution<> possible_num_properties_dist(
+ 0, config_->possible_num_properties.size() - 1);
+ int total_num_properties =
+ config_->possible_num_properties[possible_num_properties_dist(*random_)];
+
int num_indexed_properties = 0;
- constexpr std::array<TermMatchType::Code, 3> term_match_types = {
- TermMatchType::UNKNOWN, TermMatchType::EXACT_ONLY, TermMatchType::PREFIX};
- std::uniform_int_distribution<> dist(0, term_match_types.size() - 1);
- while (--num_properties >= 0) {
- std::string prop_name = "MonkeyTestProp" + std::to_string(num_properties);
+ for (int i = 0; i < total_num_properties; ++i) {
TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
if (num_indexed_properties < kTotalNumSections) {
- term_match_type = term_match_types[dist(*random_)];
+ term_match_type = GetRandomTermMatchType(random_);
}
if (term_match_type != TermMatchType::UNKNOWN) {
num_indexed_properties += 1;
}
- (*type_config.add_properties()) =
- GenerateProperty(prop_name, term_match_type);
+ (*type_config.add_properties()) = GenerateProperty(
+ type_config, GetRandomCardinality(random_), term_match_type);
}
return type_config;
}
+void MonkeySchemaGenerator::UpdateType(SchemaTypeConfigProto& type_config,
+ UpdateSchemaResult& result) {
+ // Delete up to 4 existing property.
+ std::uniform_int_distribution<> num_properties_to_delete_dist(0, 4);
+ for (int num_properties_to_delete = num_properties_to_delete_dist(*random_);
+ num_properties_to_delete >= 0; --num_properties_to_delete) {
+ if (type_config.properties_size() > 0) {
+ std::uniform_int_distribution<> dist(0,
+ type_config.properties_size() - 1);
+ int index_to_delete = dist(*random_);
+ // Only delete a required property for now, otherwise it would be hard
+ // to track which documents will be invalid after updating the schema.
+ if (type_config.properties(index_to_delete).cardinality() !=
+ PropertyConfigProto::Cardinality::REQUIRED) {
+ continue;
+ }
+ if (IsIndexableProperty(type_config.properties(index_to_delete))) {
+ result.schema_types_index_incompatible.insert(
+ type_config.schema_type());
+ }
+ // Removing a property will cause the type to be considered as
+ // incompatible.
+ result.schema_types_incompatible.insert(type_config.schema_type());
+
+ type_config.mutable_properties()->SwapElements(
+ index_to_delete, type_config.properties_size() - 1);
+ type_config.mutable_properties()->RemoveLast();
+ }
+ }
+
+ // Updating about 1/3 of existing properties.
+ for (int i = 0; i < type_config.properties_size(); ++i) {
+ std::uniform_int_distribution<> dist(0, 2);
+ if (dist(*random_) == 0) {
+ UpdateProperty(type_config, *type_config.mutable_properties(i), result);
+ }
+ }
+
+ // Add up to 4 new properties.
+ std::uniform_int_distribution<> num_types_to_add_dist(0, 4);
+ for (int num_types_to_add = num_types_to_add_dist(*random_);
+ num_types_to_add >= 0; --num_types_to_add) {
+ PropertyConfigProto::Cardinality::Code new_cardinality =
+ GetRandomCardinality(random_);
+ // Adding a required property will make all document of this type invalid.
+ if (new_cardinality == PropertyConfigProto::Cardinality::REQUIRED) {
+ result.schema_types_incompatible.insert(type_config.schema_type());
+ }
+ PropertyConfigProto new_property = GenerateProperty(
+ type_config, new_cardinality, GetRandomTermMatchType(random_));
+ if (IsIndexableProperty(new_property)) {
+ result.schema_types_index_incompatible.insert(type_config.schema_type());
+ }
+ (*type_config.add_properties()) = std::move(new_property);
+ }
+
+ int num_indexed_properties = 0;
+ for (int i = 0; i < type_config.properties_size(); ++i) {
+ if (IsIndexableProperty(type_config.properties(i))) {
+ ++num_indexed_properties;
+ }
+ }
+
+ if (num_indexed_properties > kTotalNumSections) {
+ result.is_invalid_schema = true;
+ }
+}
+
std::string MonkeyDocumentGenerator::GetNamespace() const {
uint32_t name_space;
// When num_namespaces is 0, all documents generated get different namespaces.
// Otherwise, namespaces will be randomly picked from a set with
// num_namespaces elements.
- if (num_namespaces_ == 0) {
+ if (config_->num_namespaces == 0) {
name_space = num_docs_generated_;
} else {
- std::uniform_int_distribution<> dist(0, num_namespaces_ - 1);
+ std::uniform_int_distribution<> dist(0, config_->num_namespaces - 1);
name_space = dist(*random_);
}
return absl_ports::StrCat("namespace", std::to_string(name_space));
@@ -96,18 +286,19 @@ std::string MonkeyDocumentGenerator::GetUri() const {
uint32_t uri;
// When num_uris is 0, all documents generated get different URIs. Otherwise,
// URIs will be randomly picked from a set with num_uris elements.
- if (num_uris_ == 0) {
+ if (config_->num_uris == 0) {
uri = num_docs_generated_;
} else {
- std::uniform_int_distribution<> dist(0, num_uris_ - 1);
+ std::uniform_int_distribution<> dist(0, config_->num_uris - 1);
uri = dist(*random_);
}
return absl_ports::StrCat("uri", std::to_string(uri));
}
int MonkeyDocumentGenerator::GetNumTokens() const {
- std::uniform_int_distribution<> dist(0, possible_num_tokens_.size() - 1);
- int n = possible_num_tokens_[dist(*random_)];
+ std::uniform_int_distribution<> dist(
+ 0, config_->possible_num_tokens_.size() - 1);
+ int n = config_->possible_num_tokens_[dist(*random_)];
// Add some noise
std::uniform_real_distribution<> real_dist(0.5, 1);
float p = real_dist(*random_);
@@ -138,15 +329,13 @@ MonkeyTokenizedDocument MonkeyDocumentGenerator::GenerateDocument() {
std::vector<std::string> prop_content = GetPropertyContent();
doc_builder.AddStringProperty(prop.property_name(),
absl_ports::StrJoin(prop_content, " "));
- // Create a tokenized section if the current property is indexable.
- if (prop.data_type() == PropertyConfigProto::DataType::STRING &&
- prop.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
- MonkeyTokenizedSection section = {
- prop.property_name(), prop.string_indexing_config().term_match_type(),
- std::move(prop_content)};
- document.tokenized_sections.push_back(std::move(section));
- }
+ // No matter whether the property is indexable currently, we have to create
+ // a section for it since a non-indexable property can become indexable
+ // after a schema type change. The in-memory icing will automatically skip
+ // sections that are non-indexable at the time of search requests.
+ MonkeyTokenizedSection section = {prop.property_name(),
+ std::move(prop_content)};
+ document.tokenized_sections.push_back(std::move(section));
}
document.document = doc_builder.Build();
++num_docs_generated_;
diff --git a/icing/monkey_test/monkey-test-generators.h b/icing/monkey_test/monkey-test-generators.h
index 6349918..72a4723 100644
--- a/icing/monkey_test/monkey-test-generators.h
+++ b/icing/monkey_test/monkey-test-generators.h
@@ -15,51 +15,66 @@
#ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
#define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
-#include <algorithm>
#include <cstdint>
#include <random>
#include <string>
#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
#include <vector>
-#include "icing/absl_ports/str_cat.h"
-#include "icing/absl_ports/str_join.h"
-#include "icing/document-builder.h"
#include "icing/monkey_test/monkey-test-common-words.h"
+#include "icing/monkey_test/monkey-test-util.h"
#include "icing/monkey_test/monkey-tokenized-document.h"
-#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
-#include "icing/schema/section.h"
+#include "icing/proto/term.pb.h"
#include "icing/util/clock.h"
namespace icing {
namespace lib {
-using MonkeyTestRandomEngine = std::mt19937;
-
// A random schema generator used for monkey testing.
class MonkeySchemaGenerator {
public:
- explicit MonkeySchemaGenerator(MonkeyTestRandomEngine* random)
- : random_(random) {}
+ struct UpdateSchemaResult {
+ SchemaProto schema;
+ bool is_invalid_schema;
+ std::unordered_set<std::string> schema_types_deleted;
+ std::unordered_set<std::string> schema_types_incompatible;
+ std::unordered_set<std::string> schema_types_index_incompatible;
+ };
+
+ explicit MonkeySchemaGenerator(
+ MonkeyTestRandomEngine* random,
+ const IcingMonkeyTestRunnerConfiguration* config)
+ : random_(random), config_(config) {}
- // To ensure that the random schema is generated with the best quality, the
- // number of properties for each type will only be randomly picked from the
- // list of possible_num_properties, instead of picking it from a range.
- // For example, a vector of [1, 2, 3, 4] means each generated types have a 25%
- // chance of getting 1 property, 2 properties, 3 properties and 4 properties.
- SchemaProto GenerateSchema(
- int num_types, const std::vector<int>& possible_num_properties) const;
+ SchemaProto GenerateSchema();
+
+ UpdateSchemaResult UpdateSchema(const SchemaProto& schema);
private:
PropertyConfigProto GenerateProperty(
- std::string_view name, TermMatchType::Code term_match_type) const;
+ const SchemaTypeConfigProto& type_config,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ TermMatchType::Code term_match_type);
+
+ void UpdateProperty(const SchemaTypeConfigProto& type_config,
+ PropertyConfigProto& property,
+ UpdateSchemaResult& result);
- SchemaTypeConfigProto GenerateType(std::string_view name,
- int num_properties) const;
+ SchemaTypeConfigProto GenerateType();
- // Does not own.
- MonkeyTestRandomEngine* random_;
+ void UpdateType(SchemaTypeConfigProto& type_config,
+ UpdateSchemaResult& result);
+
+ int num_types_generated_ = 0;
+ // A map from type name to the number of properties generated in the
+ // corresponding types.
+ std::unordered_map<std::string, int> num_properties_generated_;
+
+ MonkeyTestRandomEngine* random_; // Does not own.
+ const IcingMonkeyTestRunnerConfiguration* config_; // Does not own.
};
// A random document generator used for monkey testing.
@@ -68,16 +83,10 @@ class MonkeySchemaGenerator {
// Same for num_namespaces.
class MonkeyDocumentGenerator {
public:
- explicit MonkeyDocumentGenerator(MonkeyTestRandomEngine* random,
- const SchemaProto* schema,
- std::vector<int> possible_num_tokens,
- uint32_t num_namespaces,
- uint32_t num_uris = 0)
- : random_(random),
- schema_(schema),
- possible_num_tokens_(std::move(possible_num_tokens)),
- num_namespaces_(num_namespaces),
- num_uris_(num_uris) {}
+ explicit MonkeyDocumentGenerator(
+ MonkeyTestRandomEngine* random, const SchemaProto* schema,
+ const IcingMonkeyTestRunnerConfiguration* config)
+ : random_(random), schema_(schema), config_(config) {}
const SchemaTypeConfigProto& GetType() const {
std::uniform_int_distribution<> dist(0, schema_->types_size() - 1);
@@ -104,15 +113,10 @@ class MonkeyDocumentGenerator {
MonkeyTokenizedDocument GenerateDocument();
private:
- MonkeyTestRandomEngine* random_; // Does not own.
- const SchemaProto* schema_; // Does not own.
-
- // The possible number of tokens that may appear in generated documents, with
- // a noise factor from 0.5 to 1 applied.
- std::vector<int> possible_num_tokens_;
+ MonkeyTestRandomEngine* random_; // Does not own.
+ const SchemaProto* schema_; // Does not own.
+ const IcingMonkeyTestRunnerConfiguration* config_; // Does not own.
- uint32_t num_namespaces_;
- uint32_t num_uris_;
uint32_t num_docs_generated_ = 0;
Clock clock_;
};
diff --git a/icing/monkey_test/monkey-test-util.h b/icing/monkey_test/monkey-test-util.h
new file mode 100644
index 0000000..d6053d8
--- /dev/null
+++ b/icing/monkey_test/monkey-test-util.h
@@ -0,0 +1,68 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
+#define ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <utility>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+using MonkeyTestRandomEngine = std::mt19937;
+
+class IcingMonkeyTestRunner;
+
+struct IcingMonkeyTestRunnerConfiguration {
+ explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types,
+ int num_namespaces, int num_uris,
+ int index_merge_size)
+ : seed(seed),
+ num_types(num_types),
+ num_namespaces(num_namespaces),
+ num_uris(num_uris),
+ index_merge_size(index_merge_size) {}
+
+ uint32_t seed;
+ int num_types;
+ int num_namespaces;
+ int num_uris;
+ int index_merge_size;
+
+ // To ensure that the random schema is generated with the best quality, the
+ // number of properties for each type will only be randomly picked from this
+ // list, instead of picking it from a range. For example, a vector of
+ // [1, 2, 3, 4] means each generated types have a 25% chance of getting 1
+ // property, 2 properties, 3 properties and 4 properties.
+ std::vector<int> possible_num_properties;
+
+ // The possible number of tokens that may appear in generated documents, with
+ // a noise factor from 0.5 to 1 applied.
+ std::vector<int> possible_num_tokens_;
+
+ // An array of pairs of monkey test APIs with frequencies.
+ // If f_sum is the sum of all the frequencies, an operation with frequency f
+ // means for every f_sum iterations, the operation is expected to run f times.
+ std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
+ monkey_api_schedules;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
diff --git a/icing/monkey_test/monkey-tokenized-document.h b/icing/monkey_test/monkey-tokenized-document.h
index a0b38c2..87b77bb 100644
--- a/icing/monkey_test/monkey-tokenized-document.h
+++ b/icing/monkey_test/monkey-tokenized-document.h
@@ -16,16 +16,15 @@
#define ICING_MONKEY_TEST_MONKEY_TOKENIZED_DOCUMENT_H_
#include <string>
+#include <vector>
#include "icing/proto/document.pb.h"
-#include "icing/proto/term.pb.h"
namespace icing {
namespace lib {
struct MonkeyTokenizedSection {
std::string path;
- TermMatchType::Code term_match_type;
std::vector<std::string> token_sequence;
};
diff --git a/icing/portable/platform.h b/icing/portable/platform.h
index 4c115e1..6d8c668 100644
--- a/icing/portable/platform.h
+++ b/icing/portable/platform.h
@@ -15,7 +15,10 @@
#ifndef ICING_PORTABLE_PLATFORM_H_
#define ICING_PORTABLE_PLATFORM_H_
-#include "unicode/uversion.h"
+#include "unicode/uconfig.h" // IWYU pragma: keep
+// clang-format: do not reorder the above include.
+
+#include "unicode/uvernum.h"
namespace icing {
namespace lib {
@@ -40,13 +43,8 @@ inline bool IsIcuTokenization() {
return !IsReverseJniTokenization() && !IsCfStringTokenization();
}
-inline bool IsIcu72PlusTokenization() {
- if (!IsIcuTokenization()) {
- return false;
- }
- UVersionInfo version_array;
- u_getVersion(version_array);
- return version_array[0] >= 72;
+inline int GetIcuTokenizationVersion() {
+ return IsIcuTokenization() ? U_ICU_VERSION_MAJOR_NUM : 0;
}
// Whether we're running on android_x86
diff --git a/icing/query/advanced_query_parser/parser.cc b/icing/query/advanced_query_parser/parser.cc
index fd74561..82576a1 100644
--- a/icing/query/advanced_query_parser/parser.cc
+++ b/icing/query/advanced_query_parser/parser.cc
@@ -116,7 +116,7 @@ Parser::ConsumeMember() {
// Member could be either `TEXT (DOT TEXT)* (DOT function)?` or `TEXT STAR`
// at this point. So check for 'STAR' to differentiate the two cases.
if (Match(Lexer::TokenType::STAR)) {
- Consume(Lexer::TokenType::STAR);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::STAR));
std::string_view raw_text = text_node->raw_value();
std::string text = std::move(*text_node).value();
text_node = std::make_unique<TextNode>(std::move(text), raw_text,
@@ -125,7 +125,7 @@ Parser::ConsumeMember() {
} else {
children.push_back(std::move(text_node));
while (Match(Lexer::TokenType::DOT)) {
- Consume(Lexer::TokenType::DOT);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::DOT));
if (MatchFunction()) {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<FunctionNode> function_node,
ConsumeFunction());
@@ -201,7 +201,7 @@ Parser::ConsumeArgs() {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> arg, ConsumeExpression());
args.push_back(std::move(arg));
while (Match(Lexer::TokenType::COMMA)) {
- Consume(Lexer::TokenType::COMMA);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::COMMA));
ICING_ASSIGN_OR_RETURN(arg, ConsumeExpression());
args.push_back(std::move(arg));
}
@@ -223,7 +223,7 @@ Parser::ConsumeRestriction() {
bool has_minus = Match(Lexer::TokenType::MINUS);
if (has_minus) {
- Consume(Lexer::TokenType::MINUS);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
}
std::unique_ptr<Node> arg;
@@ -276,10 +276,10 @@ libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeTerm() {
operator_text = "MINUS";
} else {
if (Match(Lexer::TokenType::NOT)) {
- Consume(Lexer::TokenType::NOT);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::NOT));
operator_text = "NOT";
} else {
- Consume(Lexer::TokenType::MINUS);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
operator_text = "MINUS";
}
}
@@ -296,7 +296,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeFactor() {
terms.push_back(std::move(term));
while (Match(Lexer::TokenType::OR)) {
- Consume(Lexer::TokenType::OR);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::OR));
ICING_ASSIGN_OR_RETURN(term, ConsumeTerm());
terms.push_back(std::move(term));
}
@@ -330,7 +330,7 @@ Parser::ConsumeQueryExpression() {
sequences.push_back(std::move(sequence));
while (Match(Lexer::TokenType::AND)) {
- Consume(Lexer::TokenType::AND);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::AND));
ICING_ASSIGN_OR_RETURN(sequence, ConsumeSequence());
sequences.push_back(std::move(sequence));
}
@@ -348,7 +348,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeMultExpr() {
while (Match(Lexer::TokenType::TIMES) || Match(Lexer::TokenType::DIV)) {
while (Match(Lexer::TokenType::TIMES)) {
- Consume(Lexer::TokenType::TIMES);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::TIMES));
ICING_ASSIGN_OR_RETURN(node, ConsumeTerm());
stack.push_back(std::move(node));
}
@@ -357,7 +357,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeMultExpr() {
stack.push_back(std::move(node));
while (Match(Lexer::TokenType::DIV)) {
- Consume(Lexer::TokenType::DIV);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::DIV));
ICING_ASSIGN_OR_RETURN(node, ConsumeTerm());
stack.push_back(std::move(node));
}
@@ -380,7 +380,7 @@ Parser::ConsumeScoringExpression() {
while (Match(Lexer::TokenType::PLUS) || Match(Lexer::TokenType::MINUS)) {
while (Match(Lexer::TokenType::PLUS)) {
- Consume(Lexer::TokenType::PLUS);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::PLUS));
ICING_ASSIGN_OR_RETURN(node, ConsumeMultExpr());
stack.push_back(std::move(node));
}
@@ -389,7 +389,7 @@ Parser::ConsumeScoringExpression() {
stack.push_back(std::move(node));
while (Match(Lexer::TokenType::MINUS)) {
- Consume(Lexer::TokenType::MINUS);
+ ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
ICING_ASSIGN_OR_RETURN(node, ConsumeMultExpr());
stack.push_back(std::move(node));
}
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
index d75a550..31da959 100644
--- a/icing/query/advanced_query_parser/query-visitor.cc
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -33,9 +33,11 @@
#include "icing/index/iterator/doc-hit-info-iterator-none.h"
#include "icing/index/iterator/doc-hit-info-iterator-not.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-document.h"
#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/property-existence-indexing-handler.h"
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/param.h"
#include "icing/query/advanced_query_parser/parser.h"
@@ -222,13 +224,23 @@ void QueryVisitor::RegisterFunctions() {
auto property_defined = [this](std::vector<PendingValue>&& args) {
return this->PropertyDefinedFunction(std::move(args));
};
-
Function property_defined_function =
Function::Create(DataType::kDocumentIterator, "propertyDefined",
{Param(DataType::kString)}, std::move(property_defined))
.ValueOrDie();
registered_functions_.insert(
{property_defined_function.name(), std::move(property_defined_function)});
+
+ // DocHitInfoIterator hasProperty(std::string);
+ auto has_property = [this](std::vector<PendingValue>&& args) {
+ return this->HasPropertyFunction(std::move(args));
+ };
+ Function has_property_function =
+ Function::Create(DataType::kDocumentIterator, "hasProperty",
+ {Param(DataType::kString)}, std::move(has_property))
+ .ValueOrDie();
+ registered_functions_.insert(
+ {has_property_function.name(), std::move(has_property_function)});
}
libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
@@ -279,7 +291,7 @@ libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
// Update members based on results of processing the query.
if (args.size() == 2 &&
pending_property_restricts_.has_active_property_restricts()) {
- iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ iterator = DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::move(iterator), &document_store_, &schema_store_,
pending_property_restricts_.active_property_restricts(),
current_time_ms_);
@@ -322,6 +334,31 @@ QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) {
return PendingValue(std::move(property_in_schema_iterator));
}
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::HasPropertyFunction(
+ std::vector<PendingValue>&& args) {
+ // The first arg is guaranteed to be a STRING at this point. It should be safe
+ // to call ValueOrDie.
+ const std::string& property_path = args.at(0).string_val().ValueOrDie()->term;
+
+ // Perform an exact search for the property existence metadata token.
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> meta_hit_iterator,
+ index_.GetIterator(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, property_path),
+ /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY,
+ /*need_hit_term_frequency=*/false));
+
+ std::unique_ptr<DocHitInfoIterator> property_in_document_iterator =
+ std::make_unique<DocHitInfoIteratorPropertyInDocument>(
+ std::move(meta_hit_iterator));
+
+ features_.insert(kHasPropertyFunctionFeature);
+
+ return PendingValue(std::move(property_in_document_iterator));
+}
+
libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
@@ -647,7 +684,7 @@ libtextclassifier3::Status QueryVisitor::ProcessHasOperator(
std::set<std::string> property_restricts = {std::move(text_value.term)};
pending_values_.push(
- PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ PendingValue(DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::move(delegate), &document_store_, &schema_store_,
std::move(property_restricts), current_time_ms_)));
return libtextclassifier3::Status::OK;
diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h
index 38864f8..d090b3c 100644
--- a/icing/query/advanced_query_parser/query-visitor.h
+++ b/icing/query/advanced_query_parser/query-visitor.h
@@ -247,13 +247,23 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
libtextclassifier3::StatusOr<PendingValue> SearchFunction(
std::vector<PendingValue>&& args);
- // Implementation of the propertyDefined(member) custom function.
+ // Implementation of the propertyDefined(property_path) custom function.
// Returns:
- // - a Pending Value holding a DocHitIterator to be implemented.
+ // - a Pending Value holding a DocHitIterator that returns hits for all
+ // documents whose schema types have defined the property specified by
+ // property_path.
// - any errors returned by Lexer::ExtractTokens
libtextclassifier3::StatusOr<PendingValue> PropertyDefinedFunction(
std::vector<PendingValue>&& args);
+ // Implementation of the hasProperty(property_path) custom function.
+ // Returns:
+ // - a Pending Value holding a DocHitIterator that returns hits for all
+ // documents that have the property specified by property_path.
+ // - any errors returned by Lexer::ExtractTokens
+ libtextclassifier3::StatusOr<PendingValue> HasPropertyFunction(
+ std::vector<PendingValue>&& args);
+
// Handles a NaryOperatorNode where the operator is HAS (':') and pushes an
// iterator with the proper section filter applied. If the current property
// restriction represented by pending_property_restricts and the first child
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index 59e924d..9455baa 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -17,17 +17,28 @@
#include <cstdint>
#include <limits>
#include <memory>
+#include <string>
#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/hit.h"
#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/dummy-numeric-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/property-existence-indexing-handler.h"
#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
@@ -35,7 +46,13 @@
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/parser.h"
#include "icing/query/query-features.h"
+#include "icing/query/query-results.h"
#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/jni-test-helpers.h"
@@ -47,6 +64,8 @@
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/status-macros.h"
#include "unicode/uloc.h"
namespace icing {
@@ -218,16 +237,16 @@ TEST_P(QueryVisitorTest, SimpleLessThan) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price < 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -261,16 +280,16 @@ TEST_P(QueryVisitorTest, SimpleLessThanEq) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price <= 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -304,16 +323,16 @@ TEST_P(QueryVisitorTest, SimpleEqual) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price == 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -347,16 +366,16 @@ TEST_P(QueryVisitorTest, SimpleGreaterThanEq) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price >= 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -390,16 +409,16 @@ TEST_P(QueryVisitorTest, SimpleGreaterThan) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price > 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -434,16 +453,16 @@ TEST_P(QueryVisitorTest, IntMinLessThanEqual) {
int64_t int_min = std::numeric_limits<int64_t>::min();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(int_min);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_min));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(std::numeric_limits<int64_t>::max());
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::max()));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(int_min + 1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_min + 1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price <= " + std::to_string(int_min));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -478,16 +497,16 @@ TEST_P(QueryVisitorTest, IntMaxGreaterThanEqual) {
int64_t int_max = std::numeric_limits<int64_t>::max();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(std::numeric_limits<int64_t>::min());
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::min()));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(int_max);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_max));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(int_max - 1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_max - 1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price >= " + std::to_string(int_max));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -521,18 +540,18 @@ TEST_P(QueryVisitorTest, NestedPropertyLessThan) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("subscription.price < 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -594,18 +613,18 @@ TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
// Create an invalid AST for the query '3 < subscription.price 25' where '<'
// has three operands
@@ -669,18 +688,18 @@ TEST_P(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
- editor->BufferKey(0);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(0));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
- editor->BufferKey(1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("time < 25");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -722,16 +741,16 @@ TEST_P(QueryVisitorTest, IntMinLessThanInvalid) {
int64_t int_min = std::numeric_limits<int64_t>::min();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(int_min);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_min));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(std::numeric_limits<int64_t>::max());
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::max()));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(int_min + 1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_min + 1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price <" + std::to_string(int_min));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -752,16 +771,16 @@ TEST_P(QueryVisitorTest, IntMaxGreaterThanInvalid) {
int64_t int_max = std::numeric_limits<int64_t>::max();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(std::numeric_limits<int64_t>::min());
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::min()));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
- editor->BufferKey(int_max);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_max));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
- editor->BufferKey(int_max - 1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(int_max - 1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
std::string query = CreateQuery("price >" + std::to_string(int_max));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -807,34 +826,34 @@ TEST_P(QueryVisitorTest, NumericComparatorDoesntAffectLaterTerms) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
- editor->BufferKey(-2);
- editor->BufferKey(-1);
- editor->BufferKey(1);
- editor->BufferKey(2);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(-2));
+ ICING_ASSERT_OK(editor->BufferKey(-1));
+ ICING_ASSERT_OK(editor->BufferKey(1));
+ ICING_ASSERT_OK(editor->BufferKey(2));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
Index::Editor term_editor = index_->Edit(
kDocumentId0, kSectionId1, TERM_MATCH_PREFIX, /*namespace_id=*/0);
- term_editor.BufferTerm("-2");
- term_editor.BufferTerm("-1");
- term_editor.BufferTerm("1");
- term_editor.BufferTerm("2");
- term_editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(term_editor.BufferTerm("-2"));
+ ICING_ASSERT_OK(term_editor.BufferTerm("-1"));
+ ICING_ASSERT_OK(term_editor.BufferTerm("1"));
+ ICING_ASSERT_OK(term_editor.BufferTerm("2"));
+ ICING_ASSERT_OK(term_editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId0);
- editor->BufferKey(-1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(-1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId0);
- editor->BufferKey(-1);
- std::move(*editor).IndexAllBufferedKeys();
+ ICING_ASSERT_OK(editor->BufferKey(-1));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
term_editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- term_editor.BufferTerm("2");
- term_editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(term_editor.BufferTerm("2"));
+ ICING_ASSERT_OK(term_editor.IndexAllBufferedTerms());
// Translating MINUS chars that are interpreted as NOTs, this query would be
// `price == -1 AND NOT 2`
@@ -872,18 +891,18 @@ TEST_P(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
// "bar" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -924,18 +943,18 @@ TEST_P(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
// "bar" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -975,18 +994,18 @@ TEST_P(QueryVisitorTest, SingleTermPrefix) {
// "bar" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// An EXACT query for 'fo' won't match anything.
std::string query = CreateQuery("fo");
@@ -1071,21 +1090,21 @@ TEST_P(QueryVisitorTest, SegmentationWithPrefix) {
// ["foo", "ba"] and ["bar", "fo"] respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("ba");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("ba"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("ba");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("ba"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.BufferTerm("fo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.BufferTerm("fo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// An EXACT query for `ba?fo` will be lexed into a single TEXT token.
// The visitor will tokenize it into `ba` and `fo` (`?` is dropped because it
@@ -1138,18 +1157,18 @@ TEST_P(QueryVisitorTest, SingleVerbatimTerm) {
// "foo:bar(baz)" and "bar:baz(foo)" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo:bar(baz)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo:bar(baz)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo:bar(baz)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo:bar(baz)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar:baz(foo)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar:baz(foo)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("\"foo:bar(baz)\"");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -1184,18 +1203,18 @@ TEST_P(QueryVisitorTest, SingleVerbatimTermPrefix) {
// "foo:bar(abc)" and "bar:baz(foo)" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo:bar(baz)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo:bar(baz)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo:bar(abc)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo:bar(abc)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar:baz(foo)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar:baz(foo)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Query for `"foo:bar("*`. This should match docs 0 and 1.
std::string query = CreateQuery("\"foo:bar(\"*");
@@ -1236,18 +1255,18 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingQuote) {
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_EXACT, /*namespace_id=*/0);
- editor.BufferTerm(R"(foobary)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobary)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
- editor.BufferTerm(R"(foobar\)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
- editor.BufferTerm(R"(foobar")");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar")"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// From the comment above, verbatim_term = `foobar"` and verbatim_query =
// `foobar\"`
@@ -1288,19 +1307,19 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingEscape) {
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_EXACT, /*namespace_id=*/0);
- editor.BufferTerm(R"(foobary)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobary)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
// From the comment above, verbatim_term = `foobar\`.
- editor.BufferTerm(R"(foobar\)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
- editor.BufferTerm(R"(foobar")");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar")"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Issue a query for the verbatim token `foobar\`.
std::string query = CreateQuery(R"(("foobar\\"))");
@@ -1343,18 +1362,18 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_EXACT, /*namespace_id=*/0);
// From the comment above, verbatim_term = `foobary`.
- editor.BufferTerm(R"(foobary)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobary)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
- editor.BufferTerm(R"(foobar\)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
- editor.BufferTerm(R"(foobar\y)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\y)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Issue a query for the verbatim token `foobary`.
std::string query = CreateQuery(R"(("foobar\y"))");
@@ -1424,19 +1443,19 @@ TEST_P(QueryVisitorTest, VerbatimTermNewLine) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_EXACT, /*namespace_id=*/0);
// From the comment above, verbatim_term = `foobar` + '\n'.
- editor.BufferTerm("foobar\n");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foobar\n"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
- editor.BufferTerm(R"(foobar\)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
// verbatim_term = `foobar\n`. This is distinct from the term added above.
- editor.BufferTerm(R"(foobar\n)");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\n)"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Issue a query for the verbatim token `foobar` + '\n'.
std::string query = CreateQuery("\"foobar\n\"");
@@ -1498,20 +1517,20 @@ TEST_P(QueryVisitorTest, VerbatimTermEscapingComplex) {
// `foo\\\"bar\\nbaz\"` and `foo\\"bar\\nbaz"` respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_EXACT, /*namespace_id=*/0);
- editor.BufferTerm(R"(foo\"bar\nbaz")");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foo\"bar\nbaz")"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
// Add the verbatim_term from doc 0 but with all of the escapes left in
- editor.BufferTerm(R"(foo\\\"bar\\nbaz\")");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foo\\\"bar\\nbaz\")"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
/*namespace_id=*/0);
// Add the verbatim_term from doc 0 but with the escapes for '\' chars left in
- editor.BufferTerm(R"(foo\\"bar\\nbaz")");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm(R"(foo\\"bar\\nbaz")"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Issue a query for the verbatim token `foo\"bar\nbaz"`.
std::string query = CreateQuery(R"(("foo\\\"bar\\nbaz\""))");
@@ -1556,22 +1575,22 @@ TEST_P(QueryVisitorTest, SingleMinusTerm) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("-foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -1610,22 +1629,22 @@ TEST_P(QueryVisitorTest, SingleNotTerm) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("NOT foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -1660,26 +1679,26 @@ TEST_P(QueryVisitorTest, NestedNotTerms) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Double negative could be rewritten as `(foo AND NOT bar) baz`
std::string query = CreateQuery("NOT (-foo OR bar) baz");
@@ -1718,26 +1737,26 @@ TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Simplifying:
// NOT (-(NOT (foo -bar) baz) -bat) NOT bass
@@ -1776,19 +1795,19 @@ TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
TEST_P(QueryVisitorTest, ImplicitAndTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -1819,19 +1838,19 @@ TEST_P(QueryVisitorTest, ImplicitAndTerms) {
TEST_P(QueryVisitorTest, ExplicitAndTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo AND bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -1862,19 +1881,19 @@ TEST_P(QueryVisitorTest, ExplicitAndTerms) {
TEST_P(QueryVisitorTest, OrTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("fo");
- editor.BufferTerm("ba");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("fo"));
+ ICING_ASSERT_OK(editor.BufferTerm("ba"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo OR bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -1905,20 +1924,20 @@ TEST_P(QueryVisitorTest, OrTerms) {
TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Should be interpreted like `foo (bar OR baz)`
std::string query = CreateQuery("foo bar OR baz");
@@ -2012,24 +2031,24 @@ TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("baz");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("baz"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Should be interpreted like `foo ((NOT bar) OR baz)`
std::string query = CreateQuery("foo NOT bar OR baz");
@@ -2100,22 +2119,22 @@ TEST_P(QueryVisitorTest, PropertyFilter) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -2176,22 +2195,22 @@ TEST_F(QueryVisitorTest, MultiPropertyFilter) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop3_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = R"(search("foo", createList("prop1", "prop2")))";
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -2275,22 +2294,22 @@ TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo", /*property_restrict=*/"PROP1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -2345,22 +2364,22 @@ TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query =
CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1");
@@ -2413,22 +2432,22 @@ TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("(prop1:foo)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -2500,22 +2519,22 @@ TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("(prop2:foo)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -2583,22 +2602,22 @@ TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Resulting queries:
// - kPlain: `-prop1:(foo OR bar)`
@@ -2667,30 +2686,43 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
+ // Create documents as follows:
+ // Doc0:
+ // prop1: "bar"
+ // prop2: ""
+ // Doc1:
+ // prop1: "foo"
+ // prop2: ""
+ // Doc2:
+ // prop1: ""
+ // prop2: "foo"
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Resulting queries:
// - kPlain: `prop1:(-foo OR bar)`
// - kSearch: `search("-foo OR bar", createList("prop1"))`
+ //
+ // The query is equivalent to `-prop1:foo OR prop1:bar`, thus doc0 and doc2
+ // will be matched.
std::string query =
CreateQuery("(-foo OR bar)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -2712,11 +2744,14 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
UnorderedElementsAre("bar"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
- ElementsAre(kDocumentId0));
+ ElementsAre(kDocumentId2, kDocumentId0));
// Resulting queries:
- // - kPlain: `prop1:(foo OR bar)`
- // - kSearch: `search("foo OR bar", createList("prop1"))`
+ // - kPlain: `prop1:(-foo OR bar)`
+ // - kSearch: `search("-foo OR bar", createList("prop1"))`
+ //
+ // The query is equivalent to `-prop1:foo OR prop1:bar`, thus doc0 and doc2
+ // will be matched.
query = CreateQuery("(NOT foo OR bar)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
@@ -2735,7 +2770,7 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
UnorderedElementsAre("bar"));
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
- ElementsAre(kDocumentId0));
+ ElementsAre(kDocumentId2, kDocumentId0));
}
TEST_P(QueryVisitorTest, SegmentationTest) {
@@ -2756,7 +2791,7 @@ TEST_P(QueryVisitorTest, SegmentationTest) {
.Build(),
/*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false));
-
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2768,36 +2803,36 @@ TEST_P(QueryVisitorTest, SegmentationTest) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("上班");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("上班"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(kDocumentId0, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
if (IsCfStringTokenization()) {
- editor.BufferTerm("每");
- editor.BufferTerm("天");
+ ICING_ASSERT_OK(editor.BufferTerm("每"));
+ ICING_ASSERT_OK(editor.BufferTerm("天"));
} else {
- editor.BufferTerm("每天");
+ ICING_ASSERT_OK(editor.BufferTerm("每天"));
}
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("上班");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("上班"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
if (IsCfStringTokenization()) {
- editor.BufferTerm("每");
- editor.BufferTerm("天");
+ ICING_ASSERT_OK(editor.BufferTerm("每"));
+ ICING_ASSERT_OK(editor.BufferTerm("天"));
} else {
- editor.BufferTerm("每天");
+ ICING_ASSERT_OK(editor.BufferTerm("每天"));
}
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
@@ -2862,55 +2897,55 @@ TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
Index::Editor editor =
index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.BufferTerm("val1");
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Should match.
doc = DocumentBuilder(doc).SetUri("uri1").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.BufferTerm("val1");
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Shouldn't match.
doc = DocumentBuilder(doc).SetUri("uri2").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.BufferTerm("val1");
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
doc = DocumentBuilder(doc).SetUri("uri3").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val1");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
// Shouldn't match.
doc = DocumentBuilder(doc).SetUri("uri4").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val1");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Now issue a query with 'val1' restricted to 'prop1'. This should match only
// docs 1 and 3.
@@ -2977,55 +3012,55 @@ TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
Index::Editor editor =
index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.BufferTerm("val1");
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Shouldn't match.
doc = DocumentBuilder(doc).SetUri("uri1").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.BufferTerm("val1");
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Should match.
doc = DocumentBuilder(doc).SetUri("uri2").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.BufferTerm("val1");
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
doc = DocumentBuilder(doc).SetUri("uri3").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val1");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
// Shouldn't match.
doc = DocumentBuilder(doc).SetUri("uri4").Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val1");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val1"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val2");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val2"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("val0");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("val0"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Now issue a query with 'val1' restricted to 'prop1'. This should match only
// docs 1 and 3.
@@ -3197,23 +3232,23 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
editor = index_->Edit(kDocumentId2, prop1_section_id, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// *If* nested function calls were allowed, then this would simplify as:
// `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
@@ -3335,57 +3370,57 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
Index::Editor editor =
index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid1,
document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid2,
document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid3,
document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid4,
document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid5,
document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid6,
document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid7,
document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// *If* nested function calls were allowed, then this would simplify as:
// `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
@@ -3515,57 +3550,57 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
Index::Editor editor =
index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid1,
document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid2,
document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid3,
document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid4,
document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid5,
document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid6,
document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId docid7,
document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// *If* nested function calls were allowed, then this would simplify as:
// `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
@@ -3726,8 +3761,8 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionReturnsMatchingDocuments) {
DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Document 1 has the term "foo" and its schema DOESN'T have the url property.
ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
@@ -3736,16 +3771,16 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionReturnsMatchingDocuments) {
.Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Document 2 has the term "bar" and its schema has the url property.
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("typeWithUrl").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo propertyDefined(\"url\")");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -3786,8 +3821,8 @@ TEST_P(QueryVisitorTest,
DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Document 1 has the term "foo" and its schema DOESN'T have the url property.
ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
@@ -3796,8 +3831,8 @@ TEST_P(QueryVisitorTest,
.Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Attempt to query a non-existent property.
std::string query = CreateQuery("propertyDefined(\"nonexistentproperty\")");
@@ -3838,8 +3873,8 @@ TEST_P(QueryVisitorTest,
DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
// Document 1 has the term "foo" and its schema DOESN'T have the url property.
ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
@@ -3848,8 +3883,8 @@ TEST_P(QueryVisitorTest,
.Build()));
editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
/*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
std::string query = CreateQuery("foo AND NOT propertyDefined(\"url\")");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
@@ -3869,6 +3904,204 @@ TEST_P(QueryVisitorTest,
UnorderedElementsAre(kDocumentId1));
}
+TEST_F(QueryVisitorTest,
+ HasPropertyFunctionWithNoArgumentReturnsInvalidArgument) {
+ std::string query = "hasProperty()";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ HasPropertyFunctionWithMoreThanOneStringArgumentReturnsInvalidArgument) {
+ std::string query = "hasProperty(\"foo\", \"bar\")";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ HasPropertyFunctionWithTextArgumentReturnsInvalidArgument) {
+ // The argument type is TEXT, not STRING here.
+ std::string query = "hasProperty(foo)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ HasPropertyFunctionWithNonStringArgumentReturnsInvalidArgument) {
+ std::string query = "hasProperty(1 < 2)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, HasPropertyFunctionReturnsMatchingDocuments) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Simple")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and has the "price" property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("Simple").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId0,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, "price").c_str()));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+ // Document 1 has the term "foo" and doesn't have the "price" property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("Simple").Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId0, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+ // Document 2 has the term "bar" and has the "price" property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("Simple").Build()));
+ editor = index_->Edit(kDocumentId2, kSectionId0, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.BufferTerm(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, "price").c_str()));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+ // Test that `foo hasProperty("price")` matches document 0 only.
+ std::string query = CreateQuery("foo hasProperty(\"price\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor1(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor1);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor1).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kHasPropertyFunctionFeature,
+ kListFilterQueryLanguageFeature));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ UnorderedElementsAre(kDocumentId0));
+
+ // Test that `bar OR NOT hasProperty("price")` matches document 1 and
+ // document 2.
+ query = CreateQuery("bar OR NOT hasProperty(\"price\")");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor2(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor2);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor2).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kHasPropertyFunctionFeature,
+ kListFilterQueryLanguageFeature));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ UnorderedElementsAre(kDocumentId1, kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest,
+ HasPropertyFunctionReturnsNothingIfNoMatchingProperties) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Simple")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and has the "price" property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("Simple").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId0,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.BufferTerm(
+ absl_ports::StrCat(kPropertyExistenceTokenPrefix, "price").c_str()));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+ // Document 1 has the term "foo" and doesn't have the "price" property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("Simple").Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId0, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+ // Attempt to query a non-existent property.
+ std::string query = CreateQuery("hasProperty(\"nonexistentproperty\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kHasPropertyFunctionFeature,
+ kListFilterQueryLanguageFeature));
+
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest,
testing::Values(QueryType::kPlain,
QueryType::kSearch));
diff --git a/icing/query/query-features.h b/icing/query/query-features.h
index 158e13e..d829cd7 100644
--- a/icing/query/query-features.h
+++ b/icing/query/query-features.h
@@ -48,9 +48,13 @@ constexpr Feature kVerbatimSearchFeature =
constexpr Feature kListFilterQueryLanguageFeature =
"LIST_FILTER_QUERY_LANGUAGE"; // Features#LIST_FILTER_QUERY_LANGUAGE
+// This feature relates to the use of the "hasProperty(property_path)" function.
+constexpr Feature kHasPropertyFunctionFeature =
+ "HAS_PROPERTY_FUNCTION"; // Features#HAS_PROPERTY_FUNCTION
+
inline std::unordered_set<Feature> GetQueryFeaturesSet() {
return {kNumericSearchFeature, kVerbatimSearchFeature,
- kListFilterQueryLanguageFeature};
+ kListFilterQueryLanguageFeature, kHasPropertyFunctionFeature};
}
} // namespace lib
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 3e43ad9..bbfbf3c 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -176,12 +176,11 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
std::move(results.root_iterator), &document_store_, &schema_store_,
options, current_time_ms);
- // TODO(b/294114230): Move this SectionRestrict filter from root level to
- // lower levels if that would improve performance.
if (!search_spec.type_property_filters().empty()) {
- results.root_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
- std::move(results.root_iterator), &document_store_, &schema_store_,
- search_spec, current_time_ms);
+ results.root_iterator =
+ DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+ std::move(results.root_iterator), &document_store_, &schema_store_,
+ search_spec, current_time_ms);
}
return results;
}
@@ -406,7 +405,7 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
// the section restrict
std::set<std::string> section_restricts;
section_restricts.insert(std::move(frames.top().section_restrict));
- result_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ result_iterator = DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
std::move(result_iterator), &document_store_, &schema_store_,
std::move(section_restricts), current_time_ms);
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index e64de32..53e3035 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -17,6 +17,7 @@
#include <cstdint>
#include <memory>
#include <string>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
@@ -36,6 +37,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-features.h"
+#include "icing/query/query-results.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
@@ -1099,7 +1101,7 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
- index_->Merge();
+ ICING_ASSERT_OK(index_->Merge());
// Document 2 has content "animal kitten cat"
EXPECT_THAT(
@@ -3177,6 +3179,147 @@ TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
+TEST_P(QueryProcessorTest, GroupingInSectionRestriction) {
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ GTEST_SKIP() << "Grouping in section restriction is only supported in "
+ "advanced query.";
+ }
+
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Create documents as follows:
+ // Doc0:
+ // prop1: "foo"
+ // prop2: "bar"
+ // Doc1:
+ // prop1: "bar"
+ // prop2: "foo"
+ // Doc2:
+ // prop1: "foo bar"
+ // prop2: ""
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "0")
+ .SetSchema("email")
+ .Build()));
+ EXPECT_THAT(
+ AddTokenToIndex(document_id0, prop1_section_id, term_match_type, "foo"),
+ IsOk());
+ EXPECT_THAT(
+ AddTokenToIndex(document_id0, prop2_section_id, term_match_type, "bar"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ EXPECT_THAT(
+ AddTokenToIndex(document_id1, prop1_section_id, term_match_type, "bar"),
+ IsOk());
+ EXPECT_THAT(
+ AddTokenToIndex(document_id1, prop2_section_id, term_match_type, "foo"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("email")
+ .Build()));
+ EXPECT_THAT(
+ AddTokenToIndex(document_id2, prop1_section_id, term_match_type, "foo"),
+ IsOk());
+ EXPECT_THAT(
+ AddTokenToIndex(document_id2, prop1_section_id, term_match_type, "bar"),
+ IsOk());
+
+ // prop1:(foo bar) <=> prop1:foo AND prop1:bar, which matches doc2.
+ SearchSpecProto search_spec;
+ search_spec.set_query("prop1:(foo bar)");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+ search_spec.add_enabled_features(
+ std::string(kListFilterQueryLanguageFeature));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(search_spec,
+ ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(EqualsDocHitInfo(
+ document_id2, std::vector<SectionId>{prop1_section_id})));
+
+ // prop2:(foo bar) <=> prop2:foo AND prop2:bar, which matches nothing.
+ search_spec.set_query("prop2:(foo bar)");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ results, query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+
+ // prop1:(foo -bar) <=> prop1:foo AND -prop1:bar, which matches doc0.
+ search_spec.set_query("prop1:(foo -bar)");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ results, query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(EqualsDocHitInfo(
+ document_id0, std::vector<SectionId>{prop1_section_id})));
+
+ // prop2:(-foo OR bar) <=> -prop2:foo OR prop2:bar, which matches doc0 and
+ // doc2.
+ search_spec.set_query("prop2:(-foo OR bar)");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ results, query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(
+ GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{}),
+ EqualsDocHitInfo(document_id0,
+ std::vector<SectionId>{prop2_section_id})));
+
+ // prop1:((foo AND bar) OR (foo AND -baz))
+ // <=> ((prop1:foo AND prop1:bar) OR (prop1:foo AND -prop1:baz)), which
+ // matches doc0 and doc2.
+ search_spec.set_query("prop1:((foo AND bar) OR (foo AND -baz))");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ results, query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::NONE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+ EXPECT_THAT(
+ GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(EqualsDocHitInfo(document_id2,
+ std::vector<SectionId>{prop1_section_id}),
+ EqualsDocHitInfo(document_id0,
+ std::vector<SectionId>{prop1_section_id})));
+}
+
INSTANTIATE_TEST_SUITE_P(
QueryProcessorTest, QueryProcessorTest,
testing::Values(
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index a389d13..e17e388 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -448,7 +448,7 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
"Combined checksum of SchemaStore was inconsistent");
}
- BuildInMemoryCache();
+ ICING_RETURN_IF_ERROR(BuildInMemoryCache());
return libtextclassifier3::Status::OK;
}
@@ -463,7 +463,7 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
ICING_RETURN_IF_ERROR(schema_type_mapper_->Put(
type_config.schema_type(), schema_type_mapper_->num_keys()));
}
- BuildInMemoryCache();
+ ICING_RETURN_IF_ERROR(BuildInMemoryCache());
if (create_overlay_if_necessary) {
ICING_ASSIGN_OR_RETURN(
@@ -486,7 +486,7 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr)));
// LINT.IfChange(min_overlay_version_compatibility)
- // Although the current version is 2, the schema is compatible with
+ // Although the current version is 3, the schema is compatible with
// version 1, so min_overlay_version_compatibility should be 1.
int32_t min_overlay_version_compatibility = version_util::kVersionOne;
// LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
@@ -494,7 +494,7 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
/*overlay_created=*/true, min_overlay_version_compatibility);
// Rebuild in memory data - references to the old schema will be invalid
// now.
- BuildInMemoryCache();
+ ICING_RETURN_IF_ERROR(BuildInMemoryCache());
}
}
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index af6feda..72287a8 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -189,6 +189,18 @@ bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
return false;
}
+// Check if set1 is a subset of set2.
+template <typename T>
+bool IsSubset(const std::unordered_set<T>& set1,
+ const std::unordered_set<T>& set2) {
+ for (const auto& item : set1) {
+ if (set2.find(item) == set2.end()) {
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace
libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
@@ -929,31 +941,32 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
// TODO(cassiewang): consider caching property_config_map for some properties,
// e.g. using LRU cache. Or changing schema.proto to use go/protomap.
for (const PropertyConfigProto& property_config : type_config.properties()) {
- parsed_property_configs.property_config_map.emplace(
- property_config.property_name(), &property_config);
+ std::string_view property_name = property_config.property_name();
+ parsed_property_configs.property_config_map.emplace(property_name,
+ &property_config);
if (property_config.cardinality() ==
PropertyConfigProto::Cardinality::REQUIRED) {
- ++parsed_property_configs.num_required_properties;
+ parsed_property_configs.required_properties.insert(property_name);
}
// A non-default term_match_type indicates that this property is meant to be
// indexed.
if (IsIndexedProperty(property_config)) {
- ++parsed_property_configs.num_indexed_properties;
+ parsed_property_configs.indexed_properties.insert(property_name);
}
// A non-default value_type indicates that this property is meant to be
// joinable.
if (property_config.joinable_config().value_type() !=
JoinableConfig::ValueType::NONE) {
- ++parsed_property_configs.num_joinable_properties;
+ parsed_property_configs.joinable_properties.insert(property_name);
}
// Also keep track of how many nested document properties there are. Adding
// new nested document properties will result in join-index rebuild.
if (property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT) {
- ++parsed_property_configs.num_nested_document_properties;
+ parsed_property_configs.nested_document_properties.insert(property_name);
}
}
@@ -990,10 +1003,10 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// We only need to check the old, existing properties to see if they're
// compatible since we'll have old data that may be invalidated or need to
// be reindexed.
- int32_t old_required_properties = 0;
- int32_t old_indexed_properties = 0;
- int32_t old_joinable_properties = 0;
- int32_t old_nested_document_properties = 0;
+ std::unordered_set<std::string_view> old_required_properties;
+ std::unordered_set<std::string_view> old_indexed_properties;
+ std::unordered_set<std::string_view> old_joinable_properties;
+ std::unordered_set<std::string_view> old_nested_document_properties;
// If there is a different number of properties, then there must have been a
// change.
@@ -1004,23 +1017,24 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
bool is_index_incompatible = false;
bool is_join_incompatible = false;
for (const auto& old_property_config : old_type_config.properties()) {
+ std::string_view property_name = old_property_config.property_name();
if (old_property_config.cardinality() ==
PropertyConfigProto::Cardinality::REQUIRED) {
- ++old_required_properties;
+ old_required_properties.insert(property_name);
}
// A non-default term_match_type indicates that this property is meant to
// be indexed.
bool is_indexed_property = IsIndexedProperty(old_property_config);
if (is_indexed_property) {
- ++old_indexed_properties;
+ old_indexed_properties.insert(property_name);
}
bool is_joinable_property =
old_property_config.joinable_config().value_type() !=
JoinableConfig::ValueType::NONE;
if (is_joinable_property) {
- ++old_joinable_properties;
+ old_joinable_properties.insert(property_name);
}
// A nested-document property is a property of DataType::DOCUMENT.
@@ -1028,7 +1042,7 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
old_property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT;
if (is_nested_document_property) {
- ++old_nested_document_properties;
+ old_nested_document_properties.insert(property_name);
}
auto new_property_name_and_config =
@@ -1088,8 +1102,8 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// guaranteed from our previous checks that all the old properties are also
// present in the new property config, so we can do a simple int comparison
// here to detect new required properties.
- if (new_parsed_property_configs.num_required_properties >
- old_required_properties) {
+ if (!IsSubset(new_parsed_property_configs.required_properties,
+ old_required_properties)) {
ICING_VLOG(1) << absl_ports::StrCat(
"New schema '", old_type_config.schema_type(),
"' has REQUIRED properties that are not "
@@ -1101,8 +1115,8 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// indexed nested document properties), then the section ids may change.
// Since the section ids are stored in the index, we'll need to
// reindex everything.
- if (new_parsed_property_configs.num_indexed_properties >
- old_indexed_properties) {
+ if (!IsSubset(new_parsed_property_configs.indexed_properties,
+ old_indexed_properties)) {
ICING_VLOG(1) << "Set of indexed properties in schema type '"
<< old_type_config.schema_type()
<< "' has changed, required reindexing.";
@@ -1116,10 +1130,10 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// join index. This is because we index all nested joinable properties, so
// adding a nested document property will most probably result in having
// more joinable properties.
- if (new_parsed_property_configs.num_joinable_properties >
- old_joinable_properties ||
- new_parsed_property_configs.num_nested_document_properties >
- old_nested_document_properties) {
+ if (!IsSubset(new_parsed_property_configs.joinable_properties,
+ old_joinable_properties) ||
+ !IsSubset(new_parsed_property_configs.nested_document_properties,
+ old_nested_document_properties)) {
ICING_VLOG(1) << "Set of joinable properties in schema type '"
<< old_type_config.schema_type()
<< "' has changed, required reconstructing joinable cache.";
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index 6d0ff73..4f09915 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -113,17 +113,17 @@ class SchemaUtil {
std::unordered_map<std::string_view, const PropertyConfigProto*>
property_config_map;
- // Total number of properties that have an indexing config
- int32_t num_indexed_properties = 0;
+ // Properties that have an indexing config
+ std::unordered_set<std::string_view> indexed_properties;
- // Total number of properties that were REQUIRED
- int32_t num_required_properties = 0;
+ // Properties that were REQUIRED
+ std::unordered_set<std::string_view> required_properties;
- // Total number of properties that have joinable config
- int32_t num_joinable_properties = 0;
+ // Properties that have joinable config
+ std::unordered_set<std::string_view> joinable_properties;
- // Total number of properties that have DataType::DOCUMENT
- int32_t num_nested_document_properties = 0;
+ // Properties that have DataType::DOCUMENT
+ std::unordered_set<std::string_view> nested_document_properties;
};
// This function validates:
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 564bbc0..82683ba 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -2564,6 +2564,114 @@ TEST_P(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
}
+TEST_P(SchemaUtilTest, SameNumberOfRequiredFieldsCanBeIncompatible) {
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataType(TYPE_STRING)
+ // Changing required to optional should be fine
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property2")
+ .SetDataType(TYPE_STRING)
+ // Adding a new required property is incompatible
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ SchemaUtil::SchemaDelta delta = SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema, /*new_schema_dependent_map=*/{});
+ EXPECT_THAT(delta.schema_types_incompatible,
+ testing::ElementsAre(kEmailType));
+ EXPECT_THAT(delta.schema_types_index_incompatible, testing::IsEmpty());
+ EXPECT_THAT(delta.schema_types_deleted, testing::IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SameNumberOfIndexedPropertiesCanMakeIndexIncompatible) {
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property2")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta delta = SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema, /*new_schema_dependent_map=*/{});
+ EXPECT_THAT(delta.schema_types_incompatible, testing::IsEmpty());
+ EXPECT_THAT(delta.schema_types_index_incompatible,
+ testing::ElementsAre(kEmailType));
+ EXPECT_THAT(delta.schema_types_deleted, testing::IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SameNumberOfJoinablePropertiesCanMakeJoinIncompatible) {
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property2")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta delta = SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema, /*new_schema_dependent_map=*/{});
+ EXPECT_THAT(delta.schema_types_incompatible, testing::IsEmpty());
+ EXPECT_THAT(delta.schema_types_index_incompatible, testing::IsEmpty());
+ EXPECT_THAT(delta.schema_types_deleted, testing::IsEmpty());
+ EXPECT_THAT(delta.schema_types_join_incompatible,
+ testing::ElementsAre(kEmailType));
+}
+
TEST_P(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
// Configure old schema
SchemaProto schema_with_indexed_property =
@@ -3017,8 +3125,7 @@ TEST_P(SchemaUtilTest, DeletingIndexedDocumentPropertyIsIncompatible) {
EXPECT_THAT(result_schema_delta, Eq(schema_delta));
}
-TEST_P(SchemaUtilTest,
- DeletingNonIndexedDocumentPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, DeletingNonIndexedDocumentPropertyIsIncompatible) {
SchemaTypeConfigProto nested_schema =
SchemaTypeConfigBuilder()
.SetType(kEmailType)
diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
index 8284426..b827bd8 100644
--- a/icing/scoring/scoring-processor.cc
+++ b/icing/scoring/scoring-processor.cc
@@ -14,7 +14,9 @@
#include "icing/scoring/scoring-processor.h"
+#include <limits>
#include <memory>
+#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
@@ -68,7 +70,8 @@ ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
std::vector<ScoredDocumentHit> ScoringProcessor::Score(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator, int num_to_score,
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
- query_term_iterators) {
+ query_term_iterators,
+ QueryStatsProto::SearchStats* search_stats) {
std::vector<ScoredDocumentHit> scored_document_hits;
scorer_->PrepareToScore(query_term_iterators);
@@ -85,6 +88,18 @@ std::vector<ScoredDocumentHit> ScoringProcessor::Score(
doc_hit_info.document_id(), doc_hit_info.hit_section_ids_mask(), score);
}
+ if (search_stats != nullptr) {
+ search_stats->set_num_documents_scored(scored_document_hits.size());
+ DocHitInfoIterator::CallStats iterator_call_stats =
+ doc_hit_info_iterator->GetCallStats();
+ search_stats->set_num_fetched_hits_lite_index(
+ iterator_call_stats.num_leaf_advance_calls_lite_index);
+ search_stats->set_num_fetched_hits_main_index(
+ iterator_call_stats.num_leaf_advance_calls_main_index);
+ search_stats->set_num_fetched_hits_integer_index(
+ iterator_call_stats.num_leaf_advance_calls_integer_index);
+ }
+
return scored_document_hits;
}
diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
index e9efda7..8634a22 100644
--- a/icing/scoring/scoring-processor.h
+++ b/icing/scoring/scoring-processor.h
@@ -15,14 +15,19 @@
#ifndef ICING_SCORING_SCORING_PROCESSOR_H_
#define ICING_SCORING_SCORING_PROCESSOR_H_
+#include <cstdint>
#include <memory>
+#include <string>
+#include <unordered_map>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/join/join-children-fetcher.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/scoring.pb.h"
+#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/scoring/scorer.h"
#include "icing/store/document-store.h"
@@ -57,7 +62,8 @@ class ScoringProcessor {
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
int num_to_score,
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
- query_term_iterators = nullptr);
+ query_term_iterators = nullptr,
+ QueryStatsProto::SearchStats* search_stats = nullptr);
private:
explicit ScoringProcessor(std::unique_ptr<Scorer> scorer)
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 30de410..094eea1 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -53,6 +53,7 @@
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/store/namespace-id.h"
#include "icing/store/persistent-hash-map-key-mapper.h"
#include "icing/store/usage-store.h"
@@ -142,25 +143,6 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
}
-// This function will encode a namespace id into a fixed 3 bytes string.
-std::string EncodeNamespaceId(NamespaceId namespace_id) {
- // encoding should be 1 to 3 bytes based on the value of namespace_id.
- std::string encoding = encode_util::EncodeIntToCString(namespace_id);
- // Make encoding to fixed 3 bytes.
- while (encoding.size() < 3) {
- // DynamicTrie cannot handle keys with 0 as bytes, so we append it using 1,
- // just like what we do in encode_util::EncodeIntToCString.
- //
- // The reason that this works is because DecodeIntToString decodes a byte
- // value of 0x01 as 0x00. When EncodeIntToCString returns a namespaceid
- // encoding that is less than 3 bytes, it means that the id contains
- // unencoded leading 0x00. So here we're explicitly encoding those bytes as
- // 0x01.
- encoding.push_back(1);
- }
- return encoding;
-}
-
int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
int64_t ttl_ms) {
if (ttl_ms == 0) {
@@ -269,9 +251,8 @@ std::string DocumentStore::MakeFingerprint(
absl_ports::StrCat(namespace_, uri_or_schema));
return fingerprint_util::GetFingerprintString(fprint);
} else {
- return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
- encode_util::EncodeIntToCString(
- tc3farmhash::Fingerprint64(uri_or_schema)));
+ return NamespaceFingerprintIdentifier(namespace_id, uri_or_schema)
+ .EncodeToCString();
}
}
@@ -328,13 +309,15 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
filesystem, base_dir, clock, schema_store, namespace_id_fingerprint,
pre_mapping_fbv, use_persistent_hash_map, compression_level));
ICING_ASSIGN_OR_RETURN(
- DataLoss data_loss,
+ InitializeResult initialize_result,
document_store->Initialize(force_recovery_and_revalidate_documents,
initialize_stats));
CreateResult create_result;
create_result.document_store = std::move(document_store);
- create_result.data_loss = data_loss;
+ create_result.data_loss = initialize_result.data_loss;
+ create_result.derived_files_regenerated =
+ initialize_result.derived_files_regenerated;
return create_result;
}
@@ -380,9 +363,9 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
- bool force_recovery_and_revalidate_documents,
- InitializeStatsProto* initialize_stats) {
+libtextclassifier3::StatusOr<DocumentStore::InitializeResult>
+DocumentStore::Initialize(bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats) {
auto create_result_or =
DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_);
@@ -400,6 +383,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
InitializeStatsProto::RecoveryCause recovery_cause =
GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
+ bool derived_files_regenerated = false;
if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
ICING_LOG(INFO) << "Starting Document Store Recovery with cause="
<< recovery_cause << ", and create result { new_file="
@@ -416,16 +400,18 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status =
RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
- if (initialize_stats != nullptr &&
- recovery_cause != InitializeStatsProto::NONE) {
+ if (recovery_cause != InitializeStatsProto::NONE) {
// Only consider it a recovery if the client forced a recovery or there
// was data loss. Otherwise, this could just be the first time we're
// initializing and generating derived files.
- initialize_stats->set_document_store_recovery_latency_ms(
- document_recovery_timer->GetElapsedMilliseconds());
- initialize_stats->set_document_store_recovery_cause(recovery_cause);
- initialize_stats->set_document_store_data_status(
- GetDataStatus(create_result.log_create_result.data_loss));
+ derived_files_regenerated = true;
+ if (initialize_stats != nullptr) {
+ initialize_stats->set_document_store_recovery_latency_ms(
+ document_recovery_timer->GetElapsedMilliseconds());
+ initialize_stats->set_document_store_recovery_cause(recovery_cause);
+ initialize_stats->set_document_store_data_status(
+ GetDataStatus(create_result.log_create_result.data_loss));
+ }
}
if (!status.ok()) {
ICING_LOG(ERROR)
@@ -438,6 +424,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for DocumentStore.";
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
+ derived_files_regenerated = true;
libtextclassifier3::Status status = RegenerateDerivedFiles(
/*force_recovery_and_revalidate_documents=*/false);
if (initialize_stats != nullptr) {
@@ -459,7 +446,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
initialize_stats->set_num_documents(document_id_mapper_->num_elements());
}
- return create_result.log_create_result.data_loss;
+ InitializeResult initialize_result = {
+ .data_loss = create_result.log_create_result.data_loss,
+ .derived_files_regenerated = derived_files_regenerated};
+ return initialize_result;
}
libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
@@ -1177,6 +1167,25 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
"Failed to find DocumentId by key: ", name_space, ", ", uri));
}
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
+ const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier)
+ const {
+ if (!namespace_id_fingerprint_) {
+ return absl_ports::FailedPreconditionError(
+ "Cannot lookup document id by namespace id + fingerprint without "
+ "enabling it on uri_mapper");
+ }
+
+ auto document_id_or = document_key_mapper_->Get(
+ namespace_fingerprint_identifier.EncodeToCString());
+ if (document_id_or.ok()) {
+ return document_id_or.ValueOrDie();
+ }
+ return absl_ports::Annotate(
+ std::move(document_id_or).status(),
+ "Failed to find DocumentId by namespace id + fingerprint");
+}
+
std::vector<std::string> DocumentStore::GetAllNamespaces() const {
std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
GetNamespaceIdsToNamespaces(namespace_mapper_.get());
@@ -1829,10 +1838,10 @@ libtextclassifier3::Status DocumentStore::Optimize() {
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<std::vector<DocumentId>>
+libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
DocumentStore::OptimizeInto(const std::string& new_directory,
const LanguageSegmenter* lang_segmenter,
- OptimizeStatsProto* stats) {
+ OptimizeStatsProto* stats) const {
// Validates directory
if (new_directory == base_dir_) {
return absl_ports::InvalidArgumentError(
@@ -1850,20 +1859,22 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
std::move(doc_store_create_result.document_store);
// Writes all valid docs into new document store (new directory)
- int size = document_id_mapper_->num_elements();
- int num_deleted = 0;
- int num_expired = 0;
+ int document_cnt = document_id_mapper_->num_elements();
+ int num_deleted_documents = 0;
+ int num_expired_documents = 0;
UsageStore::UsageScores default_usage;
- std::vector<DocumentId> document_id_old_to_new(size, kInvalidDocumentId);
+
+ OptimizeResult result;
+ result.document_id_old_to_new.resize(document_cnt, kInvalidDocumentId);
int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
- for (DocumentId document_id = 0; document_id < size; document_id++) {
+ for (DocumentId document_id = 0; document_id < document_cnt; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
if (IsDeleted(document_id)) {
- ++num_deleted;
+ ++num_deleted_documents;
} else if (!GetNonExpiredDocumentFilterData(document_id,
current_time_ms)) {
- ++num_expired;
+ ++num_expired_documents;
}
continue;
} else if (!document_or.ok()) {
@@ -1903,7 +1914,8 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
return new_document_id_or.status();
}
- document_id_old_to_new[document_id] = new_document_id_or.ValueOrDie();
+ result.document_id_old_to_new[document_id] =
+ new_document_id_or.ValueOrDie();
// Copy over usage scores.
ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
@@ -1917,13 +1929,61 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
}
+
+ // Construct namespace_id_old_to_new
+ int namespace_cnt = namespace_mapper_->num_keys();
+ std::unordered_map<NamespaceId, std::string> old_namespaces =
+ GetNamespaceIdsToNamespaces(namespace_mapper_.get());
+ if (namespace_cnt != old_namespaces.size()) {
+ // This really shouldn't happen. If it really happens, then:
+ // - It won't block DocumentStore optimization, so don't return error here.
+ // - Instead, write a warning log here and hint the caller to rebuild index.
+ ICING_LOG(WARNING) << "Unexpected old namespace count " << namespace_cnt
+ << " vs " << old_namespaces.size();
+ result.should_rebuild_index = true;
+ } else {
+ result.namespace_id_old_to_new.resize(namespace_cnt, kInvalidNamespaceId);
+ for (const auto& [old_namespace_id, ns] : old_namespaces) {
+ if (old_namespace_id >= result.namespace_id_old_to_new.size()) {
+ // This really shouldn't happen. If it really happens, then:
+ // - It won't block DocumentStore optimization, so don't return error
+ // here.
+ // - Instead, write a warning log here and hint the caller to rebuild
+ // index.
+ ICING_LOG(WARNING) << "Found unexpected namespace id "
+ << old_namespace_id << ". Should be in range 0 to "
+ << result.namespace_id_old_to_new.size()
+ << " (exclusive).";
+ result.namespace_id_old_to_new.clear();
+ result.should_rebuild_index = true;
+ break;
+ }
+
+ auto new_namespace_id_or = new_doc_store->namespace_mapper_->Get(ns);
+ if (!new_namespace_id_or.ok()) {
+ if (absl_ports::IsNotFound(new_namespace_id_or.status())) {
+ continue;
+ }
+ // Real error, return it.
+ return std::move(new_namespace_id_or).status();
+ }
+
+ NamespaceId new_namespace_id = new_namespace_id_or.ValueOrDie();
+ // Safe to use bracket to assign given that we've checked the range above.
+ result.namespace_id_old_to_new[old_namespace_id] = new_namespace_id;
+ }
+ }
+
if (stats != nullptr) {
- stats->set_num_original_documents(size);
- stats->set_num_deleted_documents(num_deleted);
- stats->set_num_expired_documents(num_expired);
+ stats->set_num_original_documents(document_cnt);
+ stats->set_num_deleted_documents(num_deleted_documents);
+ stats->set_num_expired_documents(num_expired_documents);
+ stats->set_num_original_namespaces(namespace_cnt);
+ stats->set_num_deleted_namespaces(
+ namespace_cnt - new_doc_store->namespace_mapper_->num_keys());
}
ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL));
- return document_id_old_to_new;
+ return result;
}
libtextclassifier3::StatusOr<DocumentStore::OptimizeInfo>
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 92d4286..c228e8b 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -43,6 +43,7 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/store/namespace-id.h"
#include "icing/store/usage-store.h"
#include "icing/tokenization/language-segmenter.h"
@@ -106,6 +107,11 @@ class DocumentStore {
// unpersisted. This may be used to signal that any derived data off of the
// document store may need to be regenerated.
DataLoss data_loss;
+
+ // A boolean flag indicating if derived files of the document store have
+ // been regenerated or not. This is usually a signal for callers to detect
+ // if any id assignment has changed (e.g. NamespaceId).
+ bool derived_files_regenerated;
};
// Not copyable
@@ -270,6 +276,21 @@ class DocumentStore {
libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
std::string_view name_space, std::string_view uri) const;
+ // Helper method to find a DocumentId that is associated with the given
+ // NamespaceFingerprintIdentifier.
+ //
+ // NOTE: The DocumentId may refer to a invalid document (deleted
+ // or expired). Callers can call DoesDocumentExist(document_id) to ensure it
+ // refers to a valid Document.
+ //
+ // Returns:
+ // A DocumentId on success
+ // NOT_FOUND if the key doesn't exist
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
+ const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier)
+ const;
+
// Returns the CorpusId associated with the given namespace and schema.
//
// Returns:
@@ -439,10 +460,23 @@ class DocumentStore {
// INTERNAL_ERROR on IO error
libtextclassifier3::Status Optimize();
+ struct OptimizeResult {
+ // A vector that maps old document id to new document id.
+ std::vector<DocumentId> document_id_old_to_new;
+
+ // A vector that maps old namespace id to new namespace id. Will be empty if
+ // should_rebuild_index is set to true.
+ std::vector<NamespaceId> namespace_id_old_to_new;
+
+ // A boolean flag that hints the caller (usually IcingSearchEngine) if it
+ // should rebuild index instead of adopting the id changes via the 2 vectors
+ // above. It will be set to true if finding any id inconsistency.
+ bool should_rebuild_index = false;
+ };
// Copy data from current base directory into a new directory. Any outdated or
- // deleted data won't be copied. During the process, document ids will be
- // reassigned so any files / classes that are based on old document ids may be
- // outdated.
+ // deleted data won't be copied. During the process, document/namespace ids
+ // will be reassigned so any files / classes that are based on old
+ // document/namespace ids may be outdated.
//
// stats will be set if non-null.
//
@@ -451,12 +485,14 @@ class DocumentStore {
// method based on device usage.
//
// Returns:
- // A vector that maps from old document id to new document id on success
+ // OptimizeResult which contains a vector mapping from old document id to
+ // new document id and another vector mapping from old namespace id to new
+ // namespace id, on success
// INVALID_ARGUMENT if new_directory is same as current base directory
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeInto(
+ libtextclassifier3::StatusOr<OptimizeResult> OptimizeInto(
const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
- OptimizeStatsProto* stats = nullptr);
+ OptimizeStatsProto* stats = nullptr) const;
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -580,7 +616,15 @@ class DocumentStore {
// worry about this field.
bool initialized_ = false;
- libtextclassifier3::StatusOr<DataLoss> Initialize(
+ struct InitializeResult {
+ DataLoss data_loss;
+
+ // A boolean flag indicating if derived files of the document store have
+ // been regenerated or not. This is usually a signal for callers to detect
+ // if any id assignment has changed (e.g. NamespaceId).
+ bool derived_files_regenerated;
+ };
+ libtextclassifier3::StatusOr<InitializeResult> Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats);
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index 5b9c568..46d76d8 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -163,8 +163,9 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
// stuff.
ICING_ASSERT_OK(document_store->Put(
CreateDocument("namespace", /*uri=*/std::to_string(i))));
- document_store->Delete("namespace", /*uri=*/std::to_string(i),
- clock.GetSystemTimeMilliseconds());
+ ICING_ASSERT_OK(document_store->Delete("namespace",
+ /*uri=*/std::to_string(i),
+ clock.GetSystemTimeMilliseconds()));
}
std::default_random_engine random;
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a9c47f0..2d4cd99 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -47,6 +47,7 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -1050,7 +1051,7 @@ TEST_P(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_P(DocumentStoreTest, OptimizeInto) {
+TEST_P(DocumentStoreTest, OptimizeIntoSingleNamespace) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1103,24 +1104,33 @@ TEST_P(DocumentStoreTest, OptimizeInto) {
optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
// Validates that the optimized document log has the same size if nothing is
- // deleted
+ // deleted. Also namespace ids remain the same.
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(0, 1, 2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result1,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result1.document_id_old_to_new, ElementsAre(0, 1, 2));
+ EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0));
+ EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse());
int64_t optimized_size1 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_EQ(original_size, optimized_size1);
// Validates that the optimized document log has a smaller size if something
- // is deleted
+ // is deleted. Namespace ids remain the same.
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1",
fake_clock_.GetSystemTimeMilliseconds()));
// DocumentId 0 is removed.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result2,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result2.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, 0, 1));
+ EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0));
+ EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse());
int64_t optimized_size2 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_THAT(original_size, Gt(optimized_size2));
@@ -1130,13 +1140,17 @@ TEST_P(DocumentStoreTest, OptimizeInto) {
fake_clock_.SetSystemTimeMilliseconds(300);
// Validates that the optimized document log has a smaller size if something
- // expired
+ // expired. Namespace ids remain the same.
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
// DocumentId 0 is removed, and DocumentId 2 is expired.
- EXPECT_THAT(
- doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result3,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result3.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(0));
+ EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse());
int64_t optimized_size3 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_THAT(optimized_size2, Gt(optimized_size3));
@@ -1146,15 +1160,228 @@ TEST_P(DocumentStoreTest, OptimizeInto) {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2",
fake_clock_.GetSystemTimeMilliseconds()));
- // DocumentId 0 and 1 is removed, and DocumentId 2 is expired.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId,
- kInvalidDocumentId)));
+ // DocumentId 0 and 1 is removed, and DocumentId 2 is expired. Since no
+ // document with the namespace is added into new document store, the namespace
+ // id will be invalid.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result4,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(
+ optimize_result4.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result4.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId));
+ EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse());
int64_t optimized_size4 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_THAT(optimized_size3, Gt(optimized_size4));
}
+TEST_P(DocumentStoreTest, OptimizeIntoMultipleNamespaces) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document0 = DocumentBuilder()
+ .SetKey("namespace1", "uri0")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace3", "uri4")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ // Nothing should have expired yet.
+ fake_clock_.SetSystemTimeMilliseconds(100);
+
+ ICING_ASSERT_OK(doc_store->Put(document0));
+ ICING_ASSERT_OK(doc_store->Put(document1));
+ ICING_ASSERT_OK(doc_store->Put(document2));
+ ICING_ASSERT_OK(doc_store->Put(document3));
+ ICING_ASSERT_OK(doc_store->Put(document4));
+
+ std::string original_document_log = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
+ int64_t original_size =
+ filesystem_.GetFileSize(original_document_log.c_str());
+
+ std::string optimized_dir = document_store_dir_ + "_optimize";
+ std::string optimized_document_log =
+ optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
+
+ // Validates that the optimized document log has the same size if nothing is
+ // deleted. Also namespace ids remain the same.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result1,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result1.document_id_old_to_new,
+ ElementsAre(0, 1, 2, 3, 4));
+ EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0, 1, 2));
+ EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse());
+ int64_t optimized_size1 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_EQ(original_size, optimized_size1);
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 0 with namespace1.
+ // - Before: ["namespace1#uri0", "namespace1#uri1", "namespace2#uri2",
+ // "namespace1#uri3", "namespace3#uri4"]
+ // - After: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // In this case, new_doc_store will assign namespace ids in ["namespace1",
+ // "namespace2", "namespace3"] order. Since new_doc_store has the same order
+ // of namespace id assignment, namespace ids remain the same.
+ ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri0",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result2,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result2.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, 0, 1, 2, 3));
+ EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0, 1, 2));
+ EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse());
+ int64_t optimized_size2 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(original_size, Gt(optimized_size2));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 1 with namespace1.
+ // - Before: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // - After: [nil, nil, "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // In this case, new_doc_store will assign namespace ids in ["namespace2",
+ // "namespace1", "namespace3"] order, so namespace_id_old_to_new should
+ // reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri1",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result3,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result3.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0, 1, 2));
+ EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(1, 0, 2));
+ EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse());
+ int64_t optimized_size3 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size2, Gt(optimized_size3));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 3 with namespace1.
+ // - Before: [nil, nil, "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // - After: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"]
+ // In this case, new_doc_store will assign namespace ids in ["namespace2",
+ // "namespace3"] order and "namespace1" will be never assigned, so
+ // namespace_id_old_to_new should reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri3",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result4,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result4.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0,
+ kInvalidDocumentId, 1));
+ EXPECT_THAT(optimize_result4.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId, 0, 1));
+ EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse());
+ int64_t optimized_size4 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size3, Gt(optimized_size4));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 4 with namespace3.
+ // - Before: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"]
+ // - After: [nil, nil, "namespace2#uri2", nil, nil]
+ // In this case, new_doc_store will assign namespace ids in ["namespace2"]
+ // order and "namespace1", "namespace3" will be never assigned, so
+ // namespace_id_old_to_new should reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace3", "uri4",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result5,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result5.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0,
+ kInvalidDocumentId, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result5.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId, 0, kInvalidNamespaceId));
+ EXPECT_THAT(optimize_result5.should_rebuild_index, IsFalse());
+ int64_t optimized_size5 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size4, Gt(optimized_size5));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 2 with namespace2.
+ // - Before: [nil, nil, "namespace2#uri2", nil, nil]
+ // - After: [nil, nil, nil, nil, nil]
+ // In this case, all documents were deleted, so there will be no namespace ids
+ // either. namespace_id_old_to_new should reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace2", "uri2",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result6,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(
+ optimize_result6.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId,
+ kInvalidDocumentId, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result6.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId, kInvalidNamespaceId,
+ kInvalidNamespaceId));
+ EXPECT_THAT(optimize_result6.should_rebuild_index, IsFalse());
+ int64_t optimized_size6 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size5, Gt(optimized_size6));
+}
+
TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1165,8 +1392,13 @@ TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
std::string optimized_dir = document_store_dir_ + "_optimize";
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(IsEmpty()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result.document_id_old_to_new, IsEmpty());
+ EXPECT_THAT(optimize_result.namespace_id_old_to_new, IsEmpty());
+ EXPECT_THAT(optimize_result.should_rebuild_index, IsFalse());
}
TEST_P(DocumentStoreTest, ShouldRecoverFromDataLoss) {
@@ -3427,6 +3659,7 @@ TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsFalse());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(DocumentProto(test_document1_)));
@@ -3455,7 +3688,8 @@ TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
}
TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
@@ -3471,6 +3705,7 @@ TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsFalse());
// There's some space at the beginning of the file (e.g. header, kmagic,
// etc) that is necessary to initialize the FileBackedProtoLog. We can't
@@ -3520,7 +3755,8 @@ TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
}
TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
@@ -3573,8 +3809,12 @@ TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
std::move(create_result.document_store);
// The document log is using the legacy v0 format so that a migration is
// needed, which will also trigger regeneration.
- EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
- InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
+ EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+ Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
+ // There should be no data loss, but we still need to regenerate derived files
+ // since we migrated document log from v0 to v1.
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
}
TEST_P(DocumentStoreTest, DocumentStoreStorageInfo) {
@@ -4227,8 +4467,10 @@ TEST_P(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
.Build();
// Check that we didn't lose anything. A migration also doesn't technically
- // count as a recovery.
+ // count as data loss, but we still have to regenerate derived files after
+ // migration.
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
@@ -4582,6 +4824,46 @@ TEST_P(DocumentStoreTest, SameKeyMapperTypeShouldNotRegenerateDerivedFiles) {
}
}
+TEST_P(DocumentStoreTest, GetDocumentIdByNamespaceFingerprintIdentifier) {
+ std::string dynamic_trie_uri_mapper_dir =
+ document_store_dir_ + "/key_mapper_dir";
+ std::string persistent_hash_map_uri_mapper_dir =
+ document_store_dir_ + "/uri_mapper";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(test_document1_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId namespace_id,
+ doc_store->GetNamespaceId(test_document1_.namespace_()));
+ NamespaceFingerprintIdentifier ns_fingerprint(
+ namespace_id,
+ /*target_str=*/test_document1_.uri());
+ if (GetParam().namespace_id_fingerprint) {
+ EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint),
+ IsOkAndHolds(document_id));
+
+ NamespaceFingerprintIdentifier non_existing_ns_fingerprint(
+ namespace_id + 1, /*target_str=*/test_document1_.uri());
+ EXPECT_THAT(doc_store->GetDocumentId(non_existing_ns_fingerprint),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ } else {
+ EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
INSTANTIATE_TEST_SUITE_P(
DocumentStoreTest, DocumentStoreTest,
testing::Values(
diff --git a/icing/store/namespace-fingerprint-identifier.cc b/icing/store/namespace-fingerprint-identifier.cc
new file mode 100644
index 0000000..3910105
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier.cc
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/namespace-fingerprint-identifier.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/encode-util.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier>
+NamespaceFingerprintIdentifier::DecodeFromCString(
+ std::string_view encoded_cstr) {
+ if (encoded_cstr.size() < kMinEncodedLength) {
+ return absl_ports::InvalidArgumentError("Invalid length");
+ }
+
+ NamespaceId namespace_id = encode_util::DecodeIntFromCString(
+ encoded_cstr.substr(0, kEncodedNamespaceIdLength));
+ uint64_t fingerprint = encode_util::DecodeIntFromCString(
+ encoded_cstr.substr(kEncodedNamespaceIdLength));
+ return NamespaceFingerprintIdentifier(namespace_id, fingerprint);
+}
+
+NamespaceFingerprintIdentifier::NamespaceFingerprintIdentifier(
+ NamespaceId namespace_id, std::string_view target_str)
+ : namespace_id_(namespace_id),
+ fingerprint_(tc3farmhash::Fingerprint64(target_str)) {}
+
+std::string NamespaceFingerprintIdentifier::EncodeToCString() const {
+ // encoded_namespace_id_str should be 1 to 3 bytes based on the value of
+ // namespace_id.
+ std::string encoded_namespace_id_str =
+ encode_util::EncodeIntToCString(namespace_id_);
+ // Make encoded_namespace_id_str to fixed kEncodedNamespaceIdLength bytes.
+ while (encoded_namespace_id_str.size() < kEncodedNamespaceIdLength) {
+ // C string cannot contain 0 bytes, so we append it using 1, just like what
+ // we do in encode_util::EncodeIntToCString.
+ //
+ // The reason that this works is because DecodeIntToString decodes a byte
+ // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded
+ // namespace id that is less than 3 bytes, it means that the id contains
+ // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+ // 0x01.
+ encoded_namespace_id_str.push_back(1);
+ }
+
+ return absl_ports::StrCat(encoded_namespace_id_str,
+ encode_util::EncodeIntToCString(fingerprint_));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/namespace-fingerprint-identifier.h b/icing/store/namespace-fingerprint-identifier.h
new file mode 100644
index 0000000..d91ef94
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
+#define ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/store/namespace-id.h"
+
+namespace icing {
+namespace lib {
+
+class NamespaceFingerprintIdentifier {
+ public:
+ static constexpr int kEncodedNamespaceIdLength = 3;
+ static constexpr int kMinEncodedLength = kEncodedNamespaceIdLength + 1;
+
+ static libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier>
+ DecodeFromCString(std::string_view encoded_cstr);
+
+ explicit NamespaceFingerprintIdentifier()
+ : namespace_id_(0), fingerprint_(0) {}
+
+ explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id,
+ uint64_t fingerprint)
+ : namespace_id_(namespace_id), fingerprint_(fingerprint) {}
+
+ explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id,
+ std::string_view target_str);
+
+ std::string EncodeToCString() const;
+
+ bool operator<(const NamespaceFingerprintIdentifier& other) const {
+ if (namespace_id_ != other.namespace_id_) {
+ return namespace_id_ < other.namespace_id_;
+ }
+ return fingerprint_ < other.fingerprint_;
+ }
+
+ bool operator==(const NamespaceFingerprintIdentifier& other) const {
+ return namespace_id_ == other.namespace_id_ &&
+ fingerprint_ == other.fingerprint_;
+ }
+
+ NamespaceId namespace_id() const { return namespace_id_; }
+ uint64_t fingerprint() const { return fingerprint_; }
+
+ private:
+ NamespaceId namespace_id_;
+ uint64_t fingerprint_;
+} __attribute__((packed));
+static_assert(sizeof(NamespaceFingerprintIdentifier) == 10, "");
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
diff --git a/icing/store/namespace-fingerprint-identifier_test.cc b/icing/store/namespace-fingerprint-identifier_test.cc
new file mode 100644
index 0000000..5f86156
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier_test.cc
@@ -0,0 +1,148 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/namespace-fingerprint-identifier.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(NamespaceFingerprintIdentifierTest, EncodeToCString) {
+ NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0,
+ /*fingerprint=*/0);
+ EXPECT_THAT(identifier1.EncodeToCString(), Eq("\x01\x01\x01\x01"));
+
+ NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0,
+ /*fingerprint=*/1);
+ EXPECT_THAT(identifier2.EncodeToCString(), Eq("\x01\x01\x01\x02"));
+
+ NamespaceFingerprintIdentifier identifier3(
+ /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(identifier3.EncodeToCString(),
+ Eq("\x01\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+
+ NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1,
+ /*fingerprint=*/0);
+ EXPECT_THAT(identifier4.EncodeToCString(), Eq("\x02\x01\x01\x01"));
+
+ NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1,
+ /*fingerprint=*/1);
+ EXPECT_THAT(identifier5.EncodeToCString(), Eq("\x02\x01\x01\x02"));
+
+ NamespaceFingerprintIdentifier identifier6(
+ /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(identifier6.EncodeToCString(),
+ Eq("\x02\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+
+ NamespaceFingerprintIdentifier identifier7(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/0);
+ EXPECT_THAT(identifier7.EncodeToCString(), Eq("\x80\x80\x02\x01"));
+
+ NamespaceFingerprintIdentifier identifier8(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/1);
+ EXPECT_THAT(identifier8.EncodeToCString(), Eq("\x80\x80\x02\x02"));
+
+ NamespaceFingerprintIdentifier identifier9(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(identifier9.EncodeToCString(),
+ Eq("\x80\x80\x02\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+}
+
+TEST(NamespaceFingerprintIdentifierTest,
+ MultipleCStringConversionsAreReversible) {
+ NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0,
+ /*fingerprint=*/0);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier1.EncodeToCString()),
+ IsOkAndHolds(identifier1));
+
+ NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0,
+ /*fingerprint=*/1);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier2.EncodeToCString()),
+ IsOkAndHolds(identifier2));
+
+ NamespaceFingerprintIdentifier identifier3(
+ /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier3.EncodeToCString()),
+ IsOkAndHolds(identifier3));
+
+ NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1,
+ /*fingerprint=*/0);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier4.EncodeToCString()),
+ IsOkAndHolds(identifier4));
+
+ NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1,
+ /*fingerprint=*/1);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier5.EncodeToCString()),
+ IsOkAndHolds(identifier5));
+
+ NamespaceFingerprintIdentifier identifier6(
+ /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier6.EncodeToCString()),
+ IsOkAndHolds(identifier6));
+
+ NamespaceFingerprintIdentifier identifier7(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/0);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier7.EncodeToCString()),
+ IsOkAndHolds(identifier7));
+
+ NamespaceFingerprintIdentifier identifier8(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/1);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier8.EncodeToCString()),
+ IsOkAndHolds(identifier8));
+
+ NamespaceFingerprintIdentifier identifier9(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier9.EncodeToCString()),
+ IsOkAndHolds(identifier9));
+}
+
+TEST(NamespaceFingerprintIdentifierTest,
+ DecodeFromCStringInvalidLengthShouldReturnError) {
+ std::string invalid_str = "\x01\x01\x01";
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(invalid_str),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 2b17f13..07fe2c5 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -154,7 +154,8 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) {
UsageStore::Create(&filesystem_, test_dir_));
// Report a usage with timestamp 5.
- usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1));
UsageStore::UsageScores expected_scores = CreateUsageScores(
/*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
/*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
@@ -162,13 +163,15 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) {
IsOkAndHolds(expected_scores));
// Report a usage with timestamp 1. The timestamp won't be updated.
- usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1));
++expected_scores.usage_type1_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report a usage with timestamp 10. The timestamp should be updated.
- usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1));
expected_scores.usage_type1_last_used_timestamp_s = 10;
++expected_scores.usage_type1_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
@@ -188,7 +191,8 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) {
UsageStore::Create(&filesystem_, test_dir_));
// Report a usage with type 1.
- usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1));
UsageStore::UsageScores expected_scores = CreateUsageScores(
/*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
/*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
@@ -196,29 +200,34 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) {
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 1.
- usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1));
++expected_scores.usage_type1_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report a usage with type 2.
- usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1));
++expected_scores.usage_type2_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 2.
- usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1));
++expected_scores.usage_type2_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report a usage with type 3.
- usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1));
++expected_scores.usage_type3_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 3.
- usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1));
++expected_scores.usage_type3_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
@@ -457,7 +466,7 @@ TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
UsageStore::Create(&filesystem_, test_dir_));
// The stored timestamp in seconds should be the max value of uint32.
- usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+ ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
UsageStore::UsageScores expected_scores = CreateUsageScores(
/*type1_timestamp=*/std::numeric_limits<uint32_t>::max(),
/*type2_timestamp=*/0, /*type3_timestamp=*/0,
@@ -483,7 +492,7 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) {
// Report another usage with type 1.
UsageReport usage_report = CreateUsageReport(
"namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1);
- usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+ ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
// usage_type1_count should not change because it's already the max value.
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
@@ -571,7 +580,7 @@ TEST_F(UsageStoreTest, GetElementsFileSize) {
UsageReport usage_report = CreateUsageReport(
"namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
- usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+ ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
EXPECT_THAT(usage_store->GetElementsFileSize(),
IsOkAndHolds(Gt(empty_file_size)));
@@ -602,12 +611,13 @@ TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
UsageReport usage_report = CreateUsageReport(
"namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
for (int i = 0; i < 200; ++i) {
- usage_store->AddUsageReport(usage_report, /*document_id=*/i);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report, /*document_id=*/i));
}
// We need to persist since iOS won't see the new disk allocations until after
// everything gets written.
- usage_store->PersistToDisk();
+ ICING_ASSERT_OK(usage_store->PersistToDisk());
EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
}
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index c6500db..7d8e0cb 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -29,6 +29,7 @@
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/portable/equals-proto.h"
#include "icing/proto/search.pb.h"
@@ -70,6 +71,39 @@ MATCHER_P2(EqualsDocHitInfo, document_id, section_ids, "") {
actual.hit_section_ids_mask() == section_mask;
}
+// Used to match a DocHitInfoIterator::CallStats
+MATCHER_P5(EqualsDocHitInfoIteratorCallStats, num_leaf_advance_calls_lite_index,
+ num_leaf_advance_calls_main_index,
+ num_leaf_advance_calls_integer_index,
+ num_leaf_advance_calls_no_index, num_blocks_inspected, "") {
+ const DocHitInfoIterator::CallStats& actual = arg;
+ *result_listener << IcingStringUtil::StringPrintf(
+ "(actual is {num_leaf_advance_calls_lite_index=%d, "
+ "num_leaf_advance_calls_main_index=%d, "
+ "num_leaf_advance_calls_integer_index=%d, "
+ "num_leaf_advance_calls_no_index=%d, num_blocks_inspected=%d}, but "
+ "expected was {num_leaf_advance_calls_lite_index=%d, "
+ "num_leaf_advance_calls_main_index=%d, "
+ "num_leaf_advance_calls_integer_index=%d, "
+ "num_leaf_advance_calls_no_index=%d, num_blocks_inspected=%d}.)",
+ actual.num_leaf_advance_calls_lite_index,
+ actual.num_leaf_advance_calls_main_index,
+ actual.num_leaf_advance_calls_integer_index,
+ actual.num_leaf_advance_calls_no_index, actual.num_blocks_inspected,
+ num_leaf_advance_calls_lite_index, num_leaf_advance_calls_main_index,
+ num_leaf_advance_calls_integer_index, num_leaf_advance_calls_no_index,
+ num_blocks_inspected);
+ return actual.num_leaf_advance_calls_lite_index ==
+ num_leaf_advance_calls_lite_index &&
+ actual.num_leaf_advance_calls_main_index ==
+ num_leaf_advance_calls_main_index &&
+ actual.num_leaf_advance_calls_integer_index ==
+ num_leaf_advance_calls_integer_index &&
+ actual.num_leaf_advance_calls_no_index ==
+ num_leaf_advance_calls_no_index &&
+ actual.num_blocks_inspected == num_blocks_inspected;
+}
+
struct ExtractTermFrequenciesResult {
std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies = {0};
SectionIdMask section_mask = kSectionIdMaskNone;
diff --git a/icing/tokenization/combined-tokenizer_test.cc b/icing/tokenization/combined-tokenizer_test.cc
index 8314e91..0e400e2 100644
--- a/icing/tokenization/combined-tokenizer_test.cc
+++ b/icing/tokenization/combined-tokenizer_test.cc
@@ -178,7 +178,7 @@ TEST_F(CombinedTokenizerTest, ColonsPropertyRestricts) {
CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
lang_segmenter_.get()));
- if (IsIcu72PlusTokenization()) {
+ if (GetIcuTokenizationVersion() >= 72) {
// In ICU 72+ and above, ':' are no longer considered word connectors. The
// query tokenizer should still consider them to be property restricts.
constexpr std::string_view kText = "foo:bar";
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 3bacbc6..a7f7419 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -296,12 +296,19 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
// 2. '@' became a word connector
// 3. <numeric><word-connector><numeric> such as "3'14" is now considered as
// a single token.
- if (IsIcu72PlusTokenization()) {
+ if (GetIcuTokenizationVersion() >= 72) {
EXPECT_THAT(
language_segmenter->GetAllTerms("com:google:android"),
IsOkAndHolds(ElementsAre("com", ":", "google", ":", "android")));
- EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
- IsOkAndHolds(ElementsAre("com@google@android")));
+ // In ICU 74, the rules for '@' were reverted.
+ if (GetIcuTokenizationVersion() >= 74) {
+ EXPECT_THAT(
+ language_segmenter->GetAllTerms("com@google@android"),
+ IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android")));
+ } else {
+ EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
+ IsOkAndHolds(ElementsAre("com@google@android")));
+ }
EXPECT_THAT(language_segmenter->GetAllTerms("3'14"),
IsOkAndHolds(ElementsAre("3'14")));
} else {
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index a00f2f7..39cc0ed 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -349,7 +349,7 @@ TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
// connector pre-ICU 72. For ICU 72 and above, it's no longer considered a
// connector.
// TODO(b/254874614): Handle colon word breaks in ICU 72+
- if (IsIcu72PlusTokenization()) {
+ if (GetIcuTokenizationVersion() >= 72) {
EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property:foo:bar"),
IsOkAndHolds(ElementsAre(
EqualsToken(Token::Type::QUERY_PROPERTY, "property"),
diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc
index f32e541..58d4956 100644
--- a/icing/transform/icu/icu-normalizer.cc
+++ b/icing/transform/icu/icu-normalizer.cc
@@ -50,6 +50,7 @@ constexpr UChar kTransformRulesUtf16[] =
"Latin-ASCII; " // Map Latin characters to ASCII characters
"Hiragana-Katakana; " // Map hiragana to katakana
"[:Latin:] NFD; " // Decompose Latin letters
+ "[:Greek:] NFD; " // Decompose Greek letters
"[:Nonspacing Mark:] Remove; " // Remove accent / diacritic marks
"NFKC"; // Decompose and compose everything
diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h
index 7c64506..f6f2b78 100644
--- a/icing/transform/icu/icu-normalizer.h
+++ b/icing/transform/icu/icu-normalizer.h
@@ -33,7 +33,8 @@ namespace lib {
// 2. Transforms full-width Latin characters to ASCII characters if possible.
// 3. Transforms hiragana to katakana.
// 4. Removes accent / diacritic marks on Latin characters
-// 5. Normalized text must be less than or equal to max_term_byte_size,
+// 5. Removes accent / diacritic marks on Greek characters
+// 6. Normalized text must be less than or equal to max_term_byte_size,
// otherwise it will be truncated.
//
// There're some other rules from ICU not listed here, please see .cc file for
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc
index fe8289a..89d5f1e 100644
--- a/icing/transform/icu/icu-normalizer_benchmark.cc
+++ b/icing/transform/icu/icu-normalizer_benchmark.cc
@@ -39,8 +39,8 @@
// blaze-bin/icing/transform/icu/icu-normalizer_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmark_filter=all
-// --adb
+// $ adb shell /data/local/tmp/icu-normalizer_benchmark
+// --benchmark_filter=all --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
// the benchmark will set up data files accordingly.
@@ -61,7 +61,6 @@ void BM_NormalizeUppercase(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Normalizer> normalizer,
normalizer_factory::Create(
-
/*max_term_byte_size=*/std::numeric_limits<int>::max()));
std::string input_string(state.range(0), 'A');
@@ -95,7 +94,6 @@ void BM_NormalizeAccent(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Normalizer> normalizer,
normalizer_factory::Create(
-
/*max_term_byte_size=*/std::numeric_limits<int>::max()));
std::string input_string;
@@ -123,7 +121,7 @@ BENCHMARK(BM_NormalizeAccent)
->Arg(2048000)
->Arg(4096000);
-void BM_NormalizeHiragana(benchmark::State& state) {
+void BM_NormalizeGreekAccent(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
@@ -133,7 +131,43 @@ void BM_NormalizeHiragana(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Normalizer> normalizer,
normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+ std::string input_string;
+ while (input_string.length() < state.range(0)) {
+ input_string.append("άὰᾶἀἄ");
+ }
+
+ for (auto _ : state) {
+ normalizer->NormalizeTerm(input_string);
+ }
+}
+BENCHMARK(BM_NormalizeGreekAccent)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
+void BM_NormalizeHiragana(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Normalizer> normalizer,
+ normalizer_factory::Create(
/*max_term_byte_size=*/std::numeric_limits<int>::max()));
std::string input_string;
@@ -171,7 +205,6 @@ void BM_UppercaseSubTokenLength(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Normalizer> normalizer,
normalizer_factory::Create(
-
/*max_term_byte_size=*/std::numeric_limits<int>::max()));
std::string input_string(state.range(0), 'A');
@@ -207,7 +240,6 @@ void BM_AccentSubTokenLength(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Normalizer> normalizer,
normalizer_factory::Create(
-
/*max_term_byte_size=*/std::numeric_limits<int>::max()));
std::string input_string;
@@ -248,7 +280,6 @@ void BM_HiraganaSubTokenLength(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Normalizer> normalizer,
normalizer_factory::Create(
-
/*max_term_byte_size=*/std::numeric_limits<int>::max()));
std::string input_string;
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index 719f7be..0df23fc 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -83,14 +83,12 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) {
Eq("eeeeeeeeeeeeeeeeeeeeeeeeeee"));
EXPECT_THAT(normalizer_->NormalizeTerm("Ḟḟ"), Eq("ff"));
EXPECT_THAT(normalizer_->NormalizeTerm("ĜĞĠĢḠḡĝğġģ"), Eq("gggggggggg"));
- EXPECT_THAT(normalizer_->NormalizeTerm("ĤḢḤḦḨḪḣḥḧḩḫĥẖ"),
- Eq("hhhhhhhhhhhhh"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ĤḢḤḦḨḪḣḥḧḩḫĥẖ"), Eq("hhhhhhhhhhhhh"));
EXPECT_THAT(normalizer_->NormalizeTerm("ÌÍÎÏĨĪĬḬḭḯìíîïĩīĭ"),
Eq("iiiiiiiiiiiiiiiii"));
EXPECT_THAT(normalizer_->NormalizeTerm("Ĵĵ"), Eq("jj"));
EXPECT_THAT(normalizer_->NormalizeTerm("ĶḰḲḴḵḱḳķ"), Eq("kkkkkkkk"));
- EXPECT_THAT(normalizer_->NormalizeTerm("ĹĻĽḶḸḼḷḹḻḽĺļľ"),
- Eq("lllllllllllll"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ĹĻĽḶḸḼḷḹḻḽĺļľ"), Eq("lllllllllllll"));
EXPECT_THAT(normalizer_->NormalizeTerm("ḾṀṂḿṁṃ"), Eq("mmmmmm"));
EXPECT_THAT(normalizer_->NormalizeTerm("ÑŃŅŇṄṆṈṊṅṇṉṋñńņň"),
Eq("nnnnnnnnnnnnnnnn"));
@@ -109,19 +107,38 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) {
EXPECT_THAT(normalizer_->NormalizeTerm("ŴẀẂẄẆẈẁẃẅẇẉŵ"), Eq("wwwwwwwwwwww"));
EXPECT_THAT(normalizer_->NormalizeTerm("ẊẌẋẍ"), Eq("xxxx"));
EXPECT_THAT(normalizer_->NormalizeTerm("ÝŶŸẎẏŷýÿ"), Eq("yyyyyyyy"));
- EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"),
- Eq("zzzzzzzzzzzz"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"), Eq("zzzzzzzzzzzz"));
EXPECT_THAT(normalizer_->NormalizeTerm("Barış"), Eq("baris"));
}
+TEST_F(IcuNormalizerTest, GreekLetterRemoveAccent) {
+ EXPECT_THAT(normalizer_->NormalizeTerm("kαλημέρα"), Eq("kαλημερα"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("εγγραφή"), Eq("εγγραφη"));
+ EXPECT_THAT(normalizer_->NormalizeTerm(
+ "ἈἉἊἋἌἍἎἏᾈᾉᾊᾋᾌᾍᾎᾏᾸᾹᾺΆᾼἀἁἂἃἄἅἆἇὰάᾀᾁᾂᾃᾄᾅᾆᾇᾰᾱᾲᾳᾴᾶᾷ"),
+ Eq("αααααααααααααααααααααααααααααααααααααααααααααα"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ἘἙἚἛἜἝῈΈἐἑἒἓἔἕὲέ"),
+ Eq("εεεεεεεεεεεεεεεε"));
+ EXPECT_THAT(
+ normalizer_->NormalizeTerm("ἨἩἪἫἬἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟῊΉῌἠἡἢἣἤἥἦἧὴήᾐᾑᾒᾓᾔᾕᾖᾗῂῃῄῆῇ"),
+ Eq("ηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηη"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ἸἹἺἻἼἽἾἿῘῙῚΊἰἱἲἳἴἵἶἷὶίῐῑῒΐῖῗ"),
+ Eq("ιιιιιιιιιιιιιιιιιιιιιιιιιιιι"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ὈὉὊὋὌὍῸΌὀὁὂὃὄὅὸό"),
+ Eq("οοοοοοοοοοοοοοοο"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("ὙὛὝὟῨῩῪΎὐὑὒὓὔὕὖὗὺύῠῡῢΰῦῧ"),
+ Eq("υυυυυυυυυυυυυυυυυυυυυυυυ"));
+ EXPECT_THAT(
+ normalizer_->NormalizeTerm("ὨὩὪὫὬὭὮὯᾨᾩᾪᾫᾬᾭᾮᾯῺΏῼὠὡὢὣὤὥὦὧὼώᾠᾡᾢᾣᾤᾥᾦᾧῲῳῴῶῷ"),
+ Eq("ωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωω"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("Ῥῤῥ"), Eq("ρρρ"));
+}
+
// Accent / diacritic marks won't be removed in non-latin chars, e.g. in
-// Japanese and Greek
+// Japanese
TEST_F(IcuNormalizerTest, NonLatinLetterNotRemoveAccent) {
// Katakana
EXPECT_THAT(normalizer_->NormalizeTerm("ダヂヅデド"), Eq("ダヂヅデド"));
- // Greek
- EXPECT_THAT(normalizer_->NormalizeTerm("kαλημέρα"), Eq("kαλημέρα"));
- EXPECT_THAT(normalizer_->NormalizeTerm("εγγραφή"), Eq("εγγραφή"));
// Our current ICU rules can't handle Hebrew properly, e.g. the accents in
// "אָלֶף־בֵּית עִבְרִי"
@@ -287,6 +304,27 @@ TEST_F(IcuNormalizerTest, PrefixMatchLength) {
term = "ÀĄḁáIcing";
match_end = normalizer->FindNormalizedMatchEndPosition(term, "aaaa");
EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ÀĄḁá"));
+
+ // Greek accents
+ term = "άνθρωπος";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "ανθ");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("άνθ"));
+
+ term = "καλημέρα";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "καλημε");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("καλημέ"));
+
+ term = "όχι";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "οχ");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("όχ"));
+
+ term = "πότε";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "ποτ");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("πότ"));
+
+ term = "ἈἉἊἋIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "αααα");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ἈἉἊἋ"));
}
TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
@@ -340,6 +378,27 @@ TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
term = "BarışIcing";
match_end = normalizer->FindNormalizedMatchEndPosition(term, "barismdi");
EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
+
+ // Greek accents
+ term = "άνθρωπος";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "ανθν");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("άνθ"));
+
+ term = "καλημέρα";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "καλημεος");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("καλημέ"));
+
+ term = "όχι";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "οχκα");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("όχ"));
+
+ term = "πότε";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "ποτρα");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("πότ"));
+
+ term = "ἈἉἊἋIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "ααααmdi");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ἈἉἊἋ"));
}
} // namespace
diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index 9d5fea7..e0880ea 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc
@@ -172,7 +172,7 @@ libtextclassifier3::Status DocumentValidator::Validate(
}
}
if (num_required_properties_actual <
- parsed_property_configs.num_required_properties) {
+ parsed_property_configs.required_properties.size()) {
return absl_ports::InvalidArgumentError(
absl_ports::StrCat("One or more required fields missing for key: (",
document.namespace_(), ", ", document.uri(), ")."));
diff --git a/icing/util/i18n-utils.cc b/icing/util/i18n-utils.cc
index ec327ad..ada9ef2 100644
--- a/icing/util/i18n-utils.cc
+++ b/icing/util/i18n-utils.cc
@@ -38,7 +38,7 @@ namespace {
// (https://www.fileformat.info/info/unicode/category/index.htm). The set of
// characters that are regarded as punctuation is not the same for std::ispunct
// and u_ispunct.
-const std::string ascii_icu_punctuation = "!\"#%&'*,./:;?@\\_-([{}])";
+constexpr std::string_view kAsciiIcuPunctuation = "!\"#%&'*,./:;?@\\_-([{}])";
} // namespace
@@ -129,7 +129,7 @@ bool IsPunctuationAt(std::string_view input, int position, int* char_len_out) {
if (char_len_out != nullptr) {
*char_len_out = 1;
}
- return ascii_icu_punctuation.find(input[position]) != std::string::npos;
+ return kAsciiIcuPunctuation.find(input[position]) != std::string_view::npos;
}
UChar32 c = GetUChar32At(input.data(), input.length(), position);
if (char_len_out != nullptr) {
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 79fcdb8..e73f16b 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -77,7 +77,7 @@ public class IcingSearchEngine implements IcingSearchEngineInterface {
icingSearchEngineImpl.close();
}
- @SuppressWarnings("deprecation")
+ @SuppressWarnings({"deprecation", "removal"}) // b/316643605
@Override
protected void finalize() throws Throwable {
icingSearchEngineImpl.close();
diff --git a/java/src/com/google/android/icing/IcingSearchEngineImpl.java b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
index 57744c4..3a00a5a 100644
--- a/java/src/com/google/android/icing/IcingSearchEngineImpl.java
+++ b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
@@ -71,7 +71,7 @@ public class IcingSearchEngineImpl implements Closeable {
closed = true;
}
- @SuppressWarnings("deprecation")
+ @SuppressWarnings({"deprecation", "removal"}) // b/316643605
@Override
protected void finalize() throws Throwable {
close();
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 958767b..9dd9e88 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 14
+// Next tag: 16
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -127,6 +127,15 @@ message IcingSearchEngineOptions {
// indexing latency.
optional int32 lite_index_sort_size = 13 [default = 8192]; // 8 KiB
+ optional bool use_new_qualified_id_join_index = 14;
+
+ // Whether to build the metadata hits used for property existence check, which
+ // is required to support the hasProperty function in advanced query.
+ //
+ // TODO(b/309826655): Implement the feature flag derived files rebuild
+ // mechanism to handle index rebuild, instead of using index's magic value.
+ optional bool build_property_existence_metadata_hits = 15;
+
reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 418fc88..fcedeed 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -52,6 +52,9 @@ message InitializeStatsProto {
// The current code version is different from existing data version.
VERSION_CHANGED = 6;
+
+ // Any dependencies have changed.
+ DEPENDENCIES_CHANGED = 7;
}
// Possible recovery causes for document store:
@@ -117,7 +120,7 @@ message InitializeStatsProto {
}
// Stats of the top-level function IcingSearchEngine::Put().
-// Next tag: 11
+// Next tag: 12
message PutDocumentStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -138,12 +141,17 @@ message PutDocumentStatsProto {
// Number of tokens added to the index.
optional int32 num_tokens_indexed = 1;
+ // Number of metadata tokens added to the index, which can only be added by
+ // PropertyExistenceIndexingHandler currently.
+ optional int32 num_metadata_tokens_indexed = 3;
+
reserved 2;
}
optional TokenizationStats tokenization_stats = 6;
- // Time used to index all indexable string terms in the document. It does not
- // include the time to merge indices.
+ // Time used to index all indexable string terms and property existence
+ // metadata terms in the document. It does not include the time to merge
+ // indices or the time to sort the lite index.
optional int32 term_index_latency_ms = 7;
// Time used to index all indexable integers in the document.
@@ -152,26 +160,35 @@ message PutDocumentStatsProto {
// Time used to index all qualified id join strings in the document.
optional int32 qualified_id_join_index_latency_ms = 9;
- // Time used to sort and merge the LiteIndex's HitBuffer.
+ // Time used to sort the LiteIndex's HitBuffer.
optional int32 lite_index_sort_latency_ms = 10;
+
+ // Time used to index all metadata terms in the document, which can only be
+ // added by PropertyExistenceIndexingHandler currently.
+ optional int32 metadata_term_index_latency_ms = 11;
}
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
-// Next tag: 23
+// Next tag: 26
message QueryStatsProto {
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// The UTF-8 length of the query string
optional int32 query_length = 16;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Number of terms in the query string.
optional int32 num_terms = 1;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Number of namespaces filtered.
optional int32 num_namespaces_filtered = 2;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Number of schema types filtered.
optional int32 num_schema_types_filtered = 3;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Strategy of scoring and ranking.
optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 4;
@@ -186,6 +203,7 @@ message QueryStatsProto {
// The actual number of results returned in the current page.
optional int32 num_results_returned_current_page = 7;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Number of documents scored.
optional int32 num_documents_scored = 8;
@@ -195,10 +213,12 @@ message QueryStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 10;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Time used to parse the query, including 2 parts: tokenizing and
// transforming tokens into an iterator tree.
optional int32 parse_query_latency_ms = 11;
+ // TODO(b/305098009): deprecate. Use parent_search_stats instead.
// Time used to score the raw results.
optional int32 scoring_latency_ms = 12;
@@ -228,6 +248,56 @@ message QueryStatsProto {
// Number of documents scored.
optional int32 num_joined_results_returned_current_page = 22;
+ // Whether it contains join query or not.
+ optional bool is_join_query = 23;
+
+ // Stats of the search. Only valid for first page.
+ // Next tag: 13
+ message SearchStats {
+ // The UTF-8 length of the query string
+ optional int32 query_length = 1;
+
+ // Number of terms in the query string.
+ optional int32 num_terms = 2;
+
+ // Number of namespaces filtered.
+ optional int32 num_namespaces_filtered = 3;
+
+ // Number of schema types filtered.
+ optional int32 num_schema_types_filtered = 4;
+
+ // Strategy of scoring and ranking.
+ optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 5;
+
+ // Number of documents scored.
+ optional int32 num_documents_scored = 6;
+
+ // Time used to parse the query, including 2 parts: tokenizing and
+ // transforming tokens into an iterator tree.
+ optional int32 parse_query_latency_ms = 7;
+
+ // Time used to score the raw results.
+ optional int32 scoring_latency_ms = 8;
+
+ // Whether it contains numeric query or not.
+ optional bool is_numeric_query = 9;
+
+ // Number of hits fetched by lite index before applying any filters.
+ optional int32 num_fetched_hits_lite_index = 10;
+
+ // Number of hits fetched by main index before applying any filters.
+ optional int32 num_fetched_hits_main_index = 11;
+
+ // Number of hits fetched by integer index before applying any filters.
+ optional int32 num_fetched_hits_integer_index = 12;
+ }
+
+ // Search stats for parent. Only valid for first page.
+ optional SearchStats parent_search_stats = 24;
+
+ // Search stats for child.
+ optional SearchStats child_search_stats = 25;
+
reserved 9;
}
diff --git a/proto/icing/proto/optimize.proto b/proto/icing/proto/optimize.proto
index 0accb9a..675f980 100644
--- a/proto/icing/proto/optimize.proto
+++ b/proto/icing/proto/optimize.proto
@@ -63,7 +63,7 @@ message GetOptimizeInfoResultProto {
optional int64 time_since_last_optimize_ms = 4;
}
-// Next tag: 11
+// Next tag: 13
message OptimizeStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -102,4 +102,10 @@ message OptimizeStatsProto {
FULL_INDEX_REBUILD = 1;
}
optional IndexRestorationMode index_restoration_mode = 10;
+
+ // Number of namespaces before the optimization.
+ optional int32 num_original_namespaces = 11;
+
+ // Number of namespaces deleted.
+ optional int32 num_deleted_namespaces = 12;
}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index bd3f395..dd08fd1 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=561560020)
+set(synced_AOSP_CL_number=587883838)