aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2023-03-01 11:35:11 -0800
committerTerry Wang <tytytyww@google.com>2023-03-01 13:00:38 -0800
commit3f1a0c3d1aed4c5f9dc8b7581247dad85dec6683 (patch)
treea690d8be13e4f736ccf26f6c8d98b229471cc64a
parent4aed4519bc1b1603565a0ee37e78ea082a6de595 (diff)
parente103b8ea56212b2a5abc082ce888843f19c7d567 (diff)
downloadicing-3f1a0c3d1aed4c5f9dc8b7581247dad85dec6683.tar.gz
Merge remote-tracking branch 'goog/upstream-master' into androidx-platform-dev
* goog/upstream-master: Update Icing from upstream. Descriptions: ====================================================================== Add PropertyUtil for all property name/path related operations ====================================================================== [JoinableCache][2.0/x] Create SchemaPropertyIterator ====================================================================== [JoinableCache][2.1/x] Handle nested indexable flag ====================================================================== [JoinableCache][2.2/x] Add schema cycle dependency detection for SchemaPropertyIterator ====================================================================== [JoinableCache][3.0/x] Refactor SectionManager ====================================================================== [JoinableCache][3.1/x] Add unit tests for SectionManager::Builder and SchemaTypeManager ====================================================================== [NumericSearch][Storage][12/x] Implement Edit and GetIterator for IntegerIndex ====================================================================== [NumericSearch][Storage][13.0/x] Rename numeric-index_test as integer-index_test ====================================================================== [NumericSearch][Storage][13.1/x] Add IntegerIndexTest ====================================================================== Support the "len", "sum" and "avg" functions in advanced scoring. ====================================================================== Support the "this.childrenScores()" function to allow expressing children scores of joins in advanced scoring. ====================================================================== Create an integration test for Join with advanced scoring ====================================================================== Rename the word "children" to "args" for function related ScoreExpression ====================================================================== Improve IndexBlock by PRead/PWrite instead of repeating mmap/msync/unmap ====================================================================== Refactor QueryVisitor to prepare for support for function calls. ====================================================================== Add support for function calls. ====================================================================== Fix breakage in score-and-rank_benchmark. ====================================================================== [NumericSearch][Storage][adhoc][ez] Fix comment for IntegerIndex ====================================================================== [NumericSearch][Storage][14/x] Create first IntegerIndexStorage benchmark ====================================================================== Rename Icing schema related terminology to prepare for polymorphism support ====================================================================== [JoinableCache][4.0/x] Move common methods from SectionManager to PropertyUtil ====================================================================== [JoinableCache][4.1/x] Retire GetSectionContent ====================================================================== [JoinableCache][4.2/x] Polish SectionManagerTest ====================================================================== Modify QueryVisitor to do: ====================================================================== [NumericSearch][Storage][15/x] Implement TransferIndex for IntegerIndexStorage ====================================================================== [NumericSearch][Storage][16/x] Implement Optimize and last added document id for IntegerIndex ====================================================================== [NumericSearch][rollout][1/x] Include indexable int64 into SchemaDelta and backward compatibility ====================================================================== Add backwards compatibility test for Icing schema storage migration. ====================================================================== Implement trim the right-most node from the doc-hit-info-iterator. ====================================================================== Add TrimmedNode structure into doc-hit-info-iterator. ====================================================================== [JoinableCache][5/x] Implement JoinableProperty and JoinablePropertyManager ====================================================================== [JoinableCache][6/x] Add JoinablePropertyManager into SchemaTypeManager ====================================================================== [JoinableCache][7/x] Implement ExtractJoinableProperties ====================================================================== [JoinableCache][8/x] Create class QualifiedIdTypeJoinableCache ====================================================================== [JoinableCache][9/x] Implement factory method for QualifiedIdTypeJoinableCache ====================================================================== [JoinableCache][10/x] Implement Get and Put for QualifiedIdTypeJoinableCache ====================================================================== [JoinableCache][11/x] Add unit tests for QualifiedIdTypeJoinableCache ====================================================================== Modify DocHitInfoIteratorSectionRestrict to allow multi-property restricts ====================================================================== Fix the definition of LiteIndex::WantsMerge. ====================================================================== [NumericSearch][rollout][2.0/x] Rollout persistent IntegerIndex ====================================================================== [NumericSearch][rollout][2.1/x] Add more tests for integer index restoration and optimization ====================================================================== [JoinableCache][adhoc][ez] Remove qualified id type joinable cache size info from document storage info ====================================================================== Integrate trim right node into suggestion processor. Bug: 208654892 Bug: 228240987 Bug: 249829533 Bug: 256081830 Bug: 259744228 Bug: 261474063 Bug: 263890397 Bug: 266103594 Bug: 268738297 Bug: 269295094 Change-Id: Id4b23bf397ca189c5050bfa6c5fd64e89138e321
-rw-r--r--icing/absl_ports/str_join.h7
-rw-r--r--icing/file/file-backed-vector.h44
-rw-r--r--icing/file/file-backed-vector_test.cc99
-rw-r--r--icing/file/persistent-storage.h2
-rw-r--r--icing/file/posting_list/flash-index-storage.cc238
-rw-r--r--icing/file/posting_list/flash-index-storage.h191
-rw-r--r--icing/file/posting_list/index-block.cc322
-rw-r--r--icing/file/posting_list/index-block.h344
-rw-r--r--icing/file/posting_list/index-block_test.cc156
-rw-r--r--icing/file/posting_list/posting-list-accessor.cc89
-rw-r--r--icing/file/posting_list/posting-list-accessor.h35
-rw-r--r--icing/file/posting_list/posting-list-identifier.h2
-rw-r--r--icing/file/posting_list/posting-list-used.cc16
-rw-r--r--icing/file/posting_list/posting-list-used.h61
-rw-r--r--icing/icing-search-engine.cc435
-rw-r--r--icing/icing-search-engine.h36
-rw-r--r--icing/icing-search-engine_backwards_compatibility_test.cc178
-rw-r--r--icing/icing-search-engine_benchmark.cc35
-rw-r--r--icing/icing-search-engine_initialization_test.cc1854
-rw-r--r--icing/icing-search-engine_optimize_test.cc208
-rw-r--r--icing/icing-search-engine_schema_test.cc548
-rw-r--r--icing/icing-search-engine_search_test.cc296
-rw-r--r--icing/icing-search-engine_suggest_test.cc209
-rw-r--r--icing/index/index-processor.cc8
-rw-r--r--icing/index/index-processor.h16
-rw-r--r--icing/index/index-processor_test.cc319
-rw-r--r--icing/index/index.cc17
-rw-r--r--icing/index/index.h12
-rw-r--r--icing/index/index_test.cc409
-rw-r--r--icing/index/integer-section-indexing-handler.cc21
-rw-r--r--icing/index/integer-section-indexing-handler.h8
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id.cc7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id.h2
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.cc34
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.h4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and_test.cc117
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc12
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.h2
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc50
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-none.h52
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.cc7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.h5
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not_test.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.cc41
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.h4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or_test.cc41
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc44
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.h7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc273
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-test-util.h16
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h37
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc9
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.h16
-rw-r--r--icing/index/lite/lite-index.h11
-rw-r--r--icing/index/lite/lite-index_test.cc3
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc8
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.h26
-rw-r--r--icing/index/main/main-index.h2
-rw-r--r--icing/index/main/main-index_test.cc57
-rw-r--r--icing/index/main/posting-list-hit-accessor.cc53
-rw-r--r--icing/index/main/posting-list-hit-accessor.h10
-rw-r--r--icing/index/main/posting-list-hit-accessor_test.cc6
-rw-r--r--icing/index/main/posting-list-hit-serializer_test.cc96
-rw-r--r--icing/index/numeric/doc-hit-info-iterator-numeric.h12
-rw-r--r--icing/index/numeric/dummy-numeric-index.h66
-rw-r--r--icing/index/numeric/integer-index-storage.cc200
-rw-r--r--icing/index/numeric/integer-index-storage.h57
-rw-r--r--icing/index/numeric/integer-index-storage_benchmark.cc270
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc433
-rw-r--r--icing/index/numeric/integer-index.cc140
-rw-r--r--icing/index/numeric/integer-index.h119
-rw-r--r--icing/index/numeric/integer-index_test.cc1189
-rw-r--r--icing/index/numeric/numeric-index.h37
-rw-r--r--icing/index/numeric/numeric-index_test.cc380
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.cc47
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.h7
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor_test.cc6
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer_test.cc56
-rw-r--r--icing/index/section-indexing-handler.h15
-rw-r--r--icing/index/string-section-indexing-handler.cc10
-rw-r--r--icing/index/string-section-indexing-handler.h10
-rw-r--r--icing/join/doc-join-info.cc48
-rw-r--r--icing/join/doc-join-info.h66
-rw-r--r--icing/join/doc-join-info_test.cc96
-rw-r--r--icing/join/join-children-fetcher_test.cc1
-rw-r--r--icing/join/qualified-id-type-joinable-cache.cc206
-rw-r--r--icing/join/qualified-id-type-joinable-cache.h202
-rw-r--r--icing/join/qualified-id-type-joinable-cache_test.cc496
-rw-r--r--icing/query/advanced_query_parser/function.cc77
-rw-r--r--icing/query/advanced_query_parser/function.h66
-rw-r--r--icing/query/advanced_query_parser/function_test.cc308
-rw-r--r--icing/query/advanced_query_parser/param.h57
-rw-r--r--icing/query/advanced_query_parser/pending-value.cc39
-rw-r--r--icing/query/advanced_query_parser/pending-value.h152
-rw-r--r--icing/query/advanced_query_parser/query-visitor.cc402
-rw-r--r--icing/query/advanced_query_parser/query-visitor.h147
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc1272
-rw-r--r--icing/query/query-processor.cc17
-rw-r--r--icing/query/query-processor_test.cc2
-rw-r--r--icing/query/suggestion-processor.cc270
-rw-r--r--icing/query/suggestion-processor.h20
-rw-r--r--icing/query/suggestion-processor_test.cc519
-rw-r--r--icing/result/projection-tree.cc5
-rw-r--r--icing/result/snippet-retriever.cc33
-rw-r--r--icing/schema-builder.h7
-rw-r--r--icing/schema/joinable-property-manager-builder_test.cc446
-rw-r--r--icing/schema/joinable-property-manager.cc175
-rw-r--r--icing/schema/joinable-property-manager.h136
-rw-r--r--icing/schema/joinable-property-manager_test.cc430
-rw-r--r--icing/schema/joinable-property.h132
-rw-r--r--icing/schema/property-util.cc122
-rw-r--r--icing/schema/property-util.h192
-rw-r--r--icing/schema/property-util_test.cc236
-rw-r--r--icing/schema/schema-property-iterator.cc80
-rw-r--r--icing/schema/schema-property-iterator.h160
-rw-r--r--icing/schema/schema-property-iterator_test.cc470
-rw-r--r--icing/schema/schema-store.cc46
-rw-r--r--icing/schema/schema-store.h31
-rw-r--r--icing/schema/schema-store_test.cc4
-rw-r--r--icing/schema/schema-type-manager.cc78
-rw-r--r--icing/schema/schema-type-manager.h72
-rw-r--r--icing/schema/schema-type-manager_test.cc352
-rw-r--r--icing/schema/schema-util.cc190
-rw-r--r--icing/schema/schema-util.h16
-rw-r--r--icing/schema/schema-util_test.cc274
-rw-r--r--icing/schema/section-manager-builder_test.cc329
-rw-r--r--icing/schema/section-manager.cc309
-rw-r--r--icing/schema/section-manager.h106
-rw-r--r--icing/schema/section-manager_test.cc662
-rw-r--r--icing/schema/section.h7
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_test.cc135
-rw-r--r--icing/scoring/advanced_scoring/score-expression.cc149
-rw-r--r--icing/scoring/advanced_scoring/score-expression.h62
-rw-r--r--icing/scoring/advanced_scoring/score-expression_test.cc65
-rw-r--r--icing/scoring/advanced_scoring/scoring-visitor.cc17
-rw-r--r--icing/scoring/score-and-rank_benchmark.cc26
-rw-r--r--icing/store/suggestion-result-checker-impl.h118
-rw-r--r--icing/testing/common-matchers.h27
-rw-r--r--icing/testing/numeric/number-generator.h39
-rw-r--r--icing/testing/numeric/uniform-distribution-integer-generator.h39
-rw-r--r--icing/tokenization/raw-query-tokenizer.cc46
-rw-r--r--icing/util/encode-util.cc50
-rw-r--r--icing/util/encode-util.h45
-rw-r--r--icing/util/encode-util_test.cc91
-rw-r--r--icing/util/snippet-helpers.cc61
-rw-r--r--icing/util/snippet-helpers.h4
-rw-r--r--proto/icing/proto/logging.proto8
-rw-r--r--synced_AOSP_CL_number.txt2
149 files changed, 17088 insertions, 3802 deletions
diff --git a/icing/absl_ports/str_join.h b/icing/absl_ports/str_join.h
index f66a977..5277bca 100644
--- a/icing/absl_ports/str_join.h
+++ b/icing/absl_ports/str_join.h
@@ -93,6 +93,11 @@ std::string StrJoin(Iterator first, Iterator last, std::string_view sep,
return result;
}
+template <typename Iterator>
+std::string StrJoin(Iterator first, Iterator last, std::string_view sep) {
+ return absl_ports::StrJoin(first, last, sep, DefaultFormatter());
+}
+
template <typename Container, typename Formatter>
std::string StrJoin(const Container& container, std::string_view sep,
Formatter&& formatter) {
@@ -112,4 +117,4 @@ std::vector<std::string_view> StrSplit(std::string_view text,
} // namespace lib
} // namespace icing
-#endif // ICING_ABSL_PORTS_STR_JOIN_H_
+#endif // ICING_ABSL_PORTS_STR_JOIN_H_ \ No newline at end of file
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 1d99e24..7408e8b 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -57,6 +57,7 @@
#define ICING_FILE_FILE_BACKED_VECTOR_H_
#include <sys/mman.h>
+#include <unistd.h>
#include <algorithm>
#include <cinttypes>
@@ -166,6 +167,13 @@ class FileBackedVector {
static constexpr int32_t kElementTypeSize = static_cast<int32_t>(sizeof(T));
static_assert(sizeof(T) <= (1 << 10));
+ // Absolute max # of elements allowed. Since we are using int32_t to store
+ // num_elements, max value is 2^31-1. Still the actual max # of elements are
+ // determined by max_file_size, kMaxFileSize, kElementTypeSize, and
+ // Header::kHeaderSize.
+ static constexpr int32_t kMaxNumElements =
+ std::numeric_limits<int32_t>::max();
+
// Creates a new FileBackedVector to read/write content to.
//
// filesystem: Object to make system level calls
@@ -350,6 +358,14 @@ class FileBackedVector {
// OUT_OF_RANGE_ERROR if len < 0 or len >= num_elements()
libtextclassifier3::Status TruncateTo(int32_t new_num_elements);
+ // Sorts the vector within range [begin_idx, end_idx).
+ // It handles SetDirty properly for the file-backed-vector.
+ //
+ // Returns:
+ // OUT_OF_RANGE_ERROR if (0 <= begin_idx < end_idx <= num_elements()) does
+ // not hold
+ libtextclassifier3::Status Sort(int32_t begin_idx, int32_t end_idx);
+
// Mark idx as changed iff idx < changes_end_, so later ComputeChecksum() can
// update checksum by the cached changes without going over [0, changes_end_).
//
@@ -467,13 +483,6 @@ class FileBackedVector {
// Grow file by at least this many elements if array is growable.
static constexpr int64_t kGrowElements = 1u << 14; // 16K
- // Absolute max # of elements allowed. Since we are using int32_t to store
- // num_elements, max value is 2^31-1. Still the actual max # of elements are
- // determined by max_file_size, kMaxFileSize, kElementTypeSize, and
- // Header::kHeaderSize.
- static constexpr int32_t kMaxNumElements =
- std::numeric_limits<int32_t>::max();
-
// Absolute max index allowed.
static constexpr int32_t kMaxIndex = kMaxNumElements - 1;
@@ -532,13 +541,13 @@ template <typename T>
constexpr int32_t FileBackedVector<T>::kElementTypeSize;
template <typename T>
-constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
+constexpr int32_t FileBackedVector<T>::kMaxNumElements;
template <typename T>
-constexpr int64_t FileBackedVector<T>::kGrowElements;
+constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
template <typename T>
-constexpr int32_t FileBackedVector<T>::kMaxNumElements;
+constexpr int64_t FileBackedVector<T>::kGrowElements;
template <typename T>
constexpr int32_t FileBackedVector<T>::kMaxIndex;
@@ -951,6 +960,21 @@ libtextclassifier3::Status FileBackedVector<T>::TruncateTo(
}
template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Sort(int32_t begin_idx,
+ int32_t end_idx) {
+ if (begin_idx < 0 || begin_idx >= end_idx ||
+ end_idx > header_->num_elements) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Invalid sort index, %d, %d", begin_idx, end_idx));
+ }
+ for (int32_t i = begin_idx; i < end_idx; ++i) {
+ SetDirty(i);
+ }
+ std::sort(mutable_array() + begin_idx, mutable_array() + end_idx);
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
void FileBackedVector<T>::SetDirty(int32_t idx) {
// Cache original value to update crcs.
if (idx >= 0 && idx < changes_end_) {
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index c526dec..524bbc1 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -1019,6 +1019,105 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
}
}
+TEST_F(FileBackedVectorTest, Sort) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK(vector->Set(0, 5));
+ ICING_ASSERT_OK(vector->Set(1, 4));
+ ICING_ASSERT_OK(vector->Set(2, 2));
+ ICING_ASSERT_OK(vector->Set(3, 3));
+ ICING_ASSERT_OK(vector->Set(4, 1));
+
+ // Sort vector range [1, 4) (excluding 4).
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
+ // Verify sorted range should be sorted and others should remain unchanged.
+ EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
+ EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+ EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+ EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+ EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
+
+ // Sort again by end_idx = num_elements().
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/vector->num_elements()),
+ IsOk());
+ EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(1)));
+ EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+ EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+ EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+ EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(5)));
+}
+
+TEST_F(FileBackedVectorTest, SortByInvalidIndexShouldReturnOutOfRangeError) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK(vector->Set(0, 5));
+ ICING_ASSERT_OK(vector->Set(1, 4));
+ ICING_ASSERT_OK(vector->Set(2, 2));
+ ICING_ASSERT_OK(vector->Set(3, 3));
+ ICING_ASSERT_OK(vector->Set(4, 1));
+
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/-1, /*end_idx=*/4),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/3),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/5, /*end_idx=*/5),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/6),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedVectorTest, SortShouldSetDirtyCorrectly) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK(vector->Set(0, 5));
+ ICING_ASSERT_OK(vector->Set(1, 4));
+ ICING_ASSERT_OK(vector->Set(2, 2));
+ ICING_ASSERT_OK(vector->Set(3, 3));
+ ICING_ASSERT_OK(vector->Set(4, 1));
+ } // Destroying the vector should trigger a checksum of the 5 elements
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ // Sort vector range [1, 4) (excluding 4).
+ EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
+ } // Destroying the vector should update the checksum
+
+ // Creating again should check that the checksum after sorting matches what
+ // was previously saved. This tests the correctness of SetDirty() for sorted
+ // elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ // Verify sorted range should be sorted and others should remain unchanged.
+ EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
+ EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+ EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+ EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+ EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
+}
+
TEST_F(FileBackedVectorTest, SetDirty) {
// 1. Create a vector and add some data.
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h
index a70c9e9..727cae9 100644
--- a/icing/file/persistent-storage.h
+++ b/icing/file/persistent-storage.h
@@ -268,7 +268,7 @@ class PersistentStorage {
virtual Crcs& crcs() = 0;
virtual const Crcs& crcs() const = 0;
- const Filesystem& filesystem_;
+ const Filesystem& filesystem_; // Does not own
// Path to the storage. It can be a single file path or a directory path
// depending on the implementation of the derived class.
//
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
index e785d87..657bd96 100644
--- a/icing/file/posting_list/flash-index-storage.cc
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -15,12 +15,16 @@
#include "icing/file/posting_list/flash-index-storage.h"
#include <sys/types.h>
+#include <unistd.h>
#include <algorithm>
#include <cerrno>
+#include <cinttypes>
#include <cstdint>
#include <memory>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/posting_list/index-block.h"
@@ -50,10 +54,13 @@ uint32_t SelectBlockSize() {
} // namespace
libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
- const std::string& index_filename, const Filesystem* filesystem,
+ std::string index_filename, const Filesystem* filesystem,
PostingListSerializer* serializer, bool in_memory) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
- FlashIndexStorage storage(index_filename, filesystem, serializer, in_memory);
+ ICING_RETURN_ERROR_IF_NULL(serializer);
+
+ FlashIndexStorage storage(filesystem, std::move(index_filename), serializer,
+ in_memory);
if (!storage.Init()) {
return absl_ports::InternalError(
"Unable to successfully read header block!");
@@ -61,16 +68,6 @@ libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
return storage;
}
-FlashIndexStorage::FlashIndexStorage(const std::string& index_filename,
- const Filesystem* filesystem,
- PostingListSerializer* serializer,
- bool has_in_memory_freelists)
- : index_filename_(index_filename),
- num_blocks_(0),
- filesystem_(filesystem),
- serializer_(serializer),
- has_in_memory_freelists_(has_in_memory_freelists) {}
-
FlashIndexStorage::~FlashIndexStorage() {
if (header_block_ != nullptr) {
FlushInMemoryFreeList();
@@ -79,8 +76,8 @@ FlashIndexStorage::~FlashIndexStorage() {
}
bool FlashIndexStorage::Init() {
- block_fd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
- if (!block_fd_.is_valid()) {
+ storage_sfd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
+ if (!storage_sfd_.is_valid()) {
return false;
}
@@ -90,7 +87,7 @@ bool FlashIndexStorage::Init() {
bool FlashIndexStorage::InitHeader() {
// Look for an existing file size.
- int64_t file_size = filesystem_->GetFileSize(block_fd_.get());
+ int64_t file_size = filesystem_->GetFileSize(storage_sfd_.get());
if (file_size == Filesystem::kBadFileSize) {
ICING_LOG(ERROR) << "Could not initialize main index. Bad file size.";
return false;
@@ -125,8 +122,7 @@ bool FlashIndexStorage::CreateHeader() {
// Work down from the largest posting list that fits in
// block_size. We don't care about locality of blocks because this
// is a flash index.
- for (uint32_t posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- block_size, serializer_->GetDataTypeBytes());
+ for (uint32_t posting_list_bytes = max_posting_list_bytes();
posting_list_bytes >= serializer_->GetMinPostingListSize();
posting_list_bytes /= 2) {
uint32_t aligned_posting_list_bytes =
@@ -149,8 +145,8 @@ bool FlashIndexStorage::CreateHeader() {
}
// Write the header.
- if (!header_block_->Write(block_fd_.get())) {
- filesystem_->Truncate(block_fd_.get(), 0);
+ if (!header_block_->Write(storage_sfd_.get())) {
+ filesystem_->Truncate(storage_sfd_.get(), 0);
return false;
}
num_blocks_ = 1;
@@ -162,7 +158,7 @@ bool FlashIndexStorage::OpenHeader(int64_t file_size) {
// Read and validate header.
ICING_ASSIGN_OR_RETURN(
HeaderBlock read_header,
- HeaderBlock::Read(filesystem_, block_fd_.get(), block_size), false);
+ HeaderBlock::Read(filesystem_, storage_sfd_.get(), block_size), false);
if (read_header.header()->magic != HeaderBlock::Header::kMagic) {
ICING_LOG(ERROR) << "Index header block wrong magic";
return false;
@@ -200,7 +196,7 @@ bool FlashIndexStorage::OpenHeader(int64_t file_size) {
<< block_size << "). Defaulting to existing block size "
<< read_header.header()->block_size;
ICING_ASSIGN_OR_RETURN(HeaderBlock read_header,
- HeaderBlock::Read(filesystem_, block_fd_.get(),
+ HeaderBlock::Read(filesystem_, storage_sfd_.get(),
read_header.header()->block_size),
false);
}
@@ -226,20 +222,20 @@ bool FlashIndexStorage::OpenHeader(int64_t file_size) {
bool FlashIndexStorage::PersistToDisk() {
// First, write header.
- if (!header_block_->Write(block_fd_.get())) {
+ if (!header_block_->Write(storage_sfd_.get())) {
ICING_LOG(ERROR) << "Write index header failed: " << strerror(errno);
return false;
}
// Then sync.
- return filesystem_->DataSync(block_fd_.get());
+ return filesystem_->DataSync(storage_sfd_.get());
}
libtextclassifier3::Status FlashIndexStorage::Reset() {
// Reset in-memory members to default values.
num_blocks_ = 0;
header_block_.reset();
- block_fd_.reset();
+ storage_sfd_.reset();
in_memory_freelists_.clear();
// Delete the underlying file.
@@ -260,36 +256,36 @@ libtextclassifier3::StatusOr<PostingListHolder>
FlashIndexStorage::GetPostingList(PostingListIdentifier id) const {
ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(id.block_index()));
ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
+ IndexBlock::PostingListAndBlockInfo pl_block_info,
block.GetAllocatedPostingList(id.posting_list_index()));
- PostingListHolder holder = {std::move(posting_list), std::move(block), id};
- return holder;
+ return PostingListHolder(std::move(pl_block_info.posting_list_used), id,
+ pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::GetIndexBlock(
- int block_index) const {
+ uint32_t block_index) const {
if (block_index >= num_blocks_) {
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
- "Unable to create an index block at index %d when only %d blocks have "
- "been allocated.",
+ "Unable to create an index block at index %" PRIu32
+ " when only %d blocks have been allocated.",
block_index, num_blocks_));
}
off_t offset = static_cast<off_t>(block_index) * block_size();
return IndexBlock::CreateFromPreexistingIndexBlockRegion(
- *filesystem_, index_filename_, serializer_, offset, block_size());
+ filesystem_, serializer_, storage_sfd_.get(), offset, block_size());
}
libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::CreateIndexBlock(
- int block_index, uint32_t posting_list_size) const {
+ uint32_t block_index, uint32_t posting_list_size) const {
if (block_index >= num_blocks_) {
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
- "Unable to create an index block at index %d when only %d blocks have "
- "been allocated.",
+ "Unable to create an index block at index %" PRIu32
+ " when only %d blocks have been allocated.",
block_index, num_blocks_));
}
off_t offset = static_cast<off_t>(block_index) * block_size();
return IndexBlock::CreateFromUninitializedRegion(
- *filesystem_, index_filename_, serializer_, offset, block_size(),
+ filesystem_, serializer_, storage_sfd_.get(), offset, block_size(),
posting_list_size);
}
@@ -315,21 +311,19 @@ FlashIndexStorage::GetPostingListFromInMemoryFreeList(int block_info_index) {
// need to free this posting list.
ICING_ASSIGN_OR_RETURN(IndexBlock block,
GetIndexBlock(posting_list_id.block_index()));
- block.FreePostingList(posting_list_id.posting_list_index());
+ ICING_RETURN_IF_ERROR(
+ block.FreePostingList(posting_list_id.posting_list_index()));
// Now, we can allocate a posting list from the same index block. It may not
// be the same posting list that was just freed, but that's okay.
- ICING_ASSIGN_OR_RETURN(PostingListIndex posting_list_index,
+ ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
block.AllocatePostingList());
- posting_list_id =
- PostingListIdentifier(posting_list_id.block_index(), posting_list_index,
- posting_list_id.posting_list_index_bits());
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
- block.GetAllocatedPostingList(posting_list_id.posting_list_index()));
- PostingListHolder holder = {std::move(posting_list), std::move(block),
- posting_list_id};
- return holder;
+ posting_list_id = PostingListIdentifier(
+ posting_list_id.block_index(), pl_block_info.posting_list_index,
+ posting_list_id.posting_list_index_bits());
+
+ return PostingListHolder(std::move(pl_block_info.posting_list_used),
+ posting_list_id, pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<PostingListHolder>
@@ -344,19 +338,18 @@ FlashIndexStorage::GetPostingListFromOnDiskFreeList(int block_info_index) {
// Get the index block
ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(block_index));
- ICING_ASSIGN_OR_RETURN(PostingListIndex posting_list_index,
+ ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
block.AllocatePostingList());
- PostingListIdentifier posting_list_id = PostingListIdentifier(
- block_index, posting_list_index, block.posting_list_index_bits());
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
- block.GetAllocatedPostingList(posting_list_id.posting_list_index()));
- if (!block.has_free_posting_lists()) {
- RemoveFromOnDiskFreeList(block_index, block_info_index, &block);
+ PostingListIdentifier posting_list_id =
+ PostingListIdentifier(block_index, pl_block_info.posting_list_index,
+ block.posting_list_index_bits());
+ if (!pl_block_info.has_free_posting_lists) {
+ ICING_RETURN_IF_ERROR(
+ RemoveFromOnDiskFreeList(block_index, block_info_index, &block));
}
- PostingListHolder holder = {std::move(posting_list), std::move(block),
- posting_list_id};
- return holder;
+
+ return PostingListHolder(std::move(pl_block_info.posting_list_used),
+ posting_list_id, pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<PostingListHolder>
@@ -371,29 +364,26 @@ FlashIndexStorage::AllocateNewPostingList(int block_info_index) {
CreateIndexBlock(block_index, header_block_->header()
->index_block_infos[block_info_index]
.posting_list_bytes));
- ICING_ASSIGN_OR_RETURN(PostingListIndex posting_list_index,
+ ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
block.AllocatePostingList());
- PostingListIdentifier posting_list_id = PostingListIdentifier(
- block_index, posting_list_index, block.posting_list_index_bits());
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list,
- block.GetAllocatedPostingList(posting_list_id.posting_list_index()));
- if (block.has_free_posting_lists()) {
+ PostingListIdentifier posting_list_id =
+ PostingListIdentifier(block_index, pl_block_info.posting_list_index,
+ block.posting_list_index_bits());
+ if (pl_block_info.has_free_posting_lists) {
AddToOnDiskFreeList(block_index, block_info_index, &block);
}
- PostingListHolder holder = {std::move(posting_list), std::move(block),
- posting_list_id};
- return holder;
+
+ return PostingListHolder(std::move(pl_block_info.posting_list_used),
+ posting_list_id, pl_block_info.next_block_index);
}
libtextclassifier3::StatusOr<PostingListHolder>
FlashIndexStorage::AllocatePostingList(uint32_t min_posting_list_bytes) {
- int max_block_size = IndexBlock::CalculateMaxPostingListBytes(
- block_size(), serializer_->GetDataTypeBytes());
- if (min_posting_list_bytes > max_block_size) {
+ int max_pl_size = max_posting_list_bytes();
+ if (min_posting_list_bytes > max_pl_size) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Requested posting list size %d exceeds max posting list size %d",
- min_posting_list_bytes, max_block_size));
+ min_posting_list_bytes, max_pl_size));
}
int best_block_info_index = FindBestIndexBlockInfo(min_posting_list_bytes);
@@ -411,29 +401,77 @@ FlashIndexStorage::AllocatePostingList(uint32_t min_posting_list_bytes) {
return AllocateNewPostingList(best_block_info_index);
}
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::AllocateAndChainMaxSizePostingList(
+ uint32_t prev_block_index) {
+ uint32_t max_pl_size = max_posting_list_bytes();
+ int best_block_info_index = FindBestIndexBlockInfo(max_pl_size);
+
+ auto holder_or = GetPostingListFromInMemoryFreeList(best_block_info_index);
+ if (!holder_or.ok()) {
+ // Nothing in memory. Look for something in the block file.
+ holder_or = GetPostingListFromOnDiskFreeList(best_block_info_index);
+ }
+
+ if (!holder_or.ok()) {
+ // Nothing in memory or block file. Allocate new block and posting list.
+ holder_or = AllocateNewPostingList(best_block_info_index);
+ }
+
+ if (!holder_or.ok()) {
+ return holder_or;
+ }
+
+ PostingListHolder holder = std::move(holder_or).ValueOrDie();
+ ICING_ASSIGN_OR_RETURN(IndexBlock block,
+ GetIndexBlock(holder.id.block_index()));
+ ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(prev_block_index));
+ holder.next_block_index = prev_block_index;
+ return holder;
+}
+
void FlashIndexStorage::AddToOnDiskFreeList(uint32_t block_index,
int block_info_index,
IndexBlock* index_block) {
- index_block->set_next_block_index(header_block_->header()
- ->index_block_infos[block_info_index]
- .free_list_block_index);
+ libtextclassifier3::Status status =
+ index_block->SetNextBlockIndex(header_block_->header()
+ ->index_block_infos[block_info_index]
+ .free_list_block_index);
+ if (!status.ok()) {
+ // If an error occurs, then simply skip this block. It just prevents us from
+ // allocating posting lists from this free block in the future and thus
+ // wastes at most one block, but the entire storage (including the
+ // FlashIndexStorage header) is still valid. Therefore, we can swallow
+ // errors here.
+ ICING_VLOG(1) << "Fail to set next block index to chain blocks with free "
+ "lists on disk: "
+ << status.error_message();
+ return;
+ }
+
header_block_->header()
->index_block_infos[block_info_index]
.free_list_block_index = block_index;
}
-void FlashIndexStorage::RemoveFromOnDiskFreeList(uint32_t block_index,
- int block_info_index,
- IndexBlock* index_block) {
+libtextclassifier3::Status FlashIndexStorage::RemoveFromOnDiskFreeList(
+ uint32_t block_index, int block_info_index, IndexBlock* index_block) {
// Cannot be used anymore. Move free ptr to the next block.
+ ICING_ASSIGN_OR_RETURN(uint32_t next_block_index,
+ index_block->GetNextBlockIndex());
+ ICING_RETURN_IF_ERROR(index_block->SetNextBlockIndex(kInvalidBlockIndex));
header_block_->header()
->index_block_infos[block_info_index]
- .free_list_block_index = index_block->next_block_index();
- index_block->set_next_block_index(kInvalidBlockIndex);
+ .free_list_block_index = next_block_index;
+ return libtextclassifier3::Status::OK;
}
-void FlashIndexStorage::FreePostingList(PostingListHolder holder) {
- uint32_t posting_list_bytes = holder.block.get_posting_list_bytes();
+libtextclassifier3::Status FlashIndexStorage::FreePostingList(
+ PostingListHolder&& holder) {
+ ICING_ASSIGN_OR_RETURN(IndexBlock block,
+ GetIndexBlock(holder.id.block_index()));
+
+ uint32_t posting_list_bytes = block.posting_list_bytes();
int best_block_info_index = FindBestIndexBlockInfo(posting_list_bytes);
// It *should* be guaranteed elsewhere that FindBestIndexBlockInfo will not
@@ -443,14 +481,24 @@ void FlashIndexStorage::FreePostingList(PostingListHolder holder) {
best_block_info_index < in_memory_freelists_.size()) {
in_memory_freelists_[best_block_info_index].Push(holder.id);
} else {
- bool was_full = !holder.block.has_free_posting_lists();
- holder.block.FreePostingList(holder.id.posting_list_index());
+ ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
+ ICING_RETURN_IF_ERROR(
+ block.FreePostingList(holder.id.posting_list_index()));
// If this block was not already full, then it is already in the free list.
- if (was_full) {
+ if (!was_not_full) {
AddToOnDiskFreeList(holder.id.block_index(), best_block_info_index,
- &holder.block);
+ &block);
}
}
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FlashIndexStorage::WritePostingListToDisk(
+ const PostingListHolder& holder) {
+ ICING_ASSIGN_OR_RETURN(IndexBlock block,
+ GetIndexBlock(holder.id.block_index()));
+ return block.WritePostingListToDisk(holder.posting_list,
+ holder.id.posting_list_index());
}
int FlashIndexStorage::GrowIndex() {
@@ -461,7 +509,7 @@ int FlashIndexStorage::GrowIndex() {
// Grow the index file.
if (!filesystem_->Grow(
- block_fd_.get(),
+ storage_sfd_.get(),
static_cast<uint64_t>(num_blocks_ + 1) * block_size())) {
ICING_VLOG(1) << "Error growing index file: " << strerror(errno);
return kInvalidBlockIndex;
@@ -470,7 +518,7 @@ int FlashIndexStorage::GrowIndex() {
return num_blocks_++;
}
-void FlashIndexStorage::FlushInMemoryFreeList() {
+libtextclassifier3::Status FlashIndexStorage::FlushInMemoryFreeList() {
for (int i = 0; i < in_memory_freelists_.size(); ++i) {
FreeList& freelist = in_memory_freelists_.at(i);
auto freelist_elt_or = freelist.TryPop();
@@ -487,17 +535,19 @@ void FlashIndexStorage::FlushInMemoryFreeList() {
continue;
}
IndexBlock block = std::move(block_or).ValueOrDie();
- bool was_full = !block.has_free_posting_lists();
- block.FreePostingList(freelist_elt.posting_list_index());
+ ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
+ ICING_RETURN_IF_ERROR(
+ block.FreePostingList(freelist_elt.posting_list_index()));
// If this block was not already full, then it is already in the free
// list.
- if (was_full) {
+ if (!was_not_full) {
AddToOnDiskFreeList(freelist_elt.block_index(), /*block_info_index=*/i,
&block);
}
freelist_elt_or = freelist.TryPop();
}
}
+ return libtextclassifier3::Status::OK;
}
void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
@@ -518,10 +568,12 @@ void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
IcingStringUtil::SStringAppendF(out, 100, "%u ", block_index);
++count;
+ block_index = kInvalidBlockIndex;
if (block_or.ok()) {
- block_index = block_or.ValueOrDie().next_block_index();
- } else {
- block_index = kInvalidBlockIndex;
+ auto block_index_or = block_or.ValueOrDie().GetNextBlockIndex();
+ if (block_index_or.ok()) {
+ block_index = block_index_or.ValueOrDie();
+ }
}
}
IcingStringUtil::SStringAppendF(out, 100, "(count=%d)\n", count);
diff --git a/icing/file/posting_list/flash-index-storage.h b/icing/file/posting_list/flash-index-storage.h
index 6c069ba..1813637 100644
--- a/icing/file/posting_list/flash-index-storage.h
+++ b/icing/file/posting_list/flash-index-storage.h
@@ -20,37 +20,47 @@
#include <string>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/flash-index-storage-header.h"
#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
-#include "icing/legacy/core/icing-packed-pod.h"
#include "icing/proto/debug.pb.h"
#include "icing/store/document-id.h"
namespace icing {
namespace lib {
-// The PostingListHolder struct exists to group together related PostingListUsed
-// IndexBlock pairs and their ids.
+// PostingListHolder: group PostingListUsed, id, and some other useful info for
+// callers.
struct PostingListHolder {
- // PostingListUseds interpret data that they themselves do NOT own. The data
- // being interpreted is stored on a flash block and its memory mapping is
- // owned by the IndexBlock. As such, the lifecycle of the PostingListUsed must
- // NOT exceed the lifecycle of the IndexBlock.
+ // PostingListUsed owns an in-memory posting list data buffer. The data being
+ // interpreted is initialized via PRead from the storage. As such, we should
+ // sync it to disk after modifying it.
PostingListUsed posting_list;
- IndexBlock block;
- // The PostingListIdentifier, which identifies both the IndexBlock and the
- // PostingListUsed, is also returned for convenience.
+
+ // The PostingListIdentifier, which identifies both the block index and the
+ // posting list index on that block, is also returned for convenience.
PostingListIdentifier id;
+
+ // Next block index is also returned for convenience. If PostingListUsed is a
+ // max-sized posting list, then the caller has to use this value to handle
+ // chained max-sized posting list blocks.
+ uint32_t next_block_index;
+
+ explicit PostingListHolder(PostingListUsed&& posting_list_in,
+ PostingListIdentifier id_in,
+ uint32_t next_block_index_in)
+ : posting_list(std::move(posting_list_in)),
+ id(id_in),
+ next_block_index(next_block_index_in) {}
};
-// The FlashIndexStorage class manages the actual file that makes up the index.
-// It allocates IndexBlocks as needed and maintains freelists to prevent
-// excessive block fragmentation.
+// The FlashIndexStorage class manages the actual file that makes up blocks for
+// posting lists. It allocates IndexBlocks as needed and maintains freelists to
+// prevent excessive block fragmentation.
//
// It maintains two types of free lists:
// 1. On-disk, Header free list - This free list is stored in the Header
@@ -81,22 +91,28 @@ class FlashIndexStorage {
//
// RETURNS:
// - On success, a valid instance of FlashIndexStorage
- // - INTERNAL error if unable to create a new header or read the existing
+ // - FAILED_PRECONDITION_ERROR if filesystem or serializer is null
+ // - INTERNAL_ERROR if unable to create a new header or read the existing
// one from disk.
static libtextclassifier3::StatusOr<FlashIndexStorage> Create(
- const std::string& index_filename, const Filesystem* filesystem,
+ std::string index_filename, const Filesystem* filesystem,
PostingListSerializer* serializer, bool in_memory = true);
- // Retrieve the PostingList referred to by PostingListIdentifier. This posting
- // list must have been previously allocated by a prior call to
+ FlashIndexStorage(FlashIndexStorage&&) = default;
+ FlashIndexStorage(const FlashIndexStorage&) = delete;
+ FlashIndexStorage& operator=(FlashIndexStorage&&) = default;
+ FlashIndexStorage& operator=(const FlashIndexStorage&) = delete;
+
+ ~FlashIndexStorage();
+
+ // Retrieves the PostingList referred to by PostingListIdentifier. This
+ // posting list must have been previously allocated by a prior call to
// AllocatePostingList.
//
// RETURNS:
// - On success, a valid instance of PostingListHolder containing the
// requested PostingListUsed.
- // - INVALID_ARGUMENT if id.posting_list_index() is out of bounds in the
- // IndexBlock referred to by id.block_index()
- // - INTERNAL_ERROR if unable to access the region in file.
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder> GetPostingList(
PostingListIdentifier id) const;
@@ -106,19 +122,51 @@ class FlashIndexStorage {
// RETURNS:
// - On success, a valid instance of PostingListHolder containing the
// requested PostingListUsed.
- // - RESOURCE_EXHAUSTED error if unable to grow the index to create a
+ // - INVALID_ARGUMENT_ERROR if min_posting_list_bytes >
+ // max_posting_list_bytes()
+ // - RESOURCE_EXHAUSTED_ERROR if unable to grow the index to create a
// PostingListUsed of the requested size.
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder> AllocatePostingList(
uint32_t min_posting_list_bytes);
- ~FlashIndexStorage();
- FlashIndexStorage(FlashIndexStorage&&) = default;
- FlashIndexStorage(const FlashIndexStorage&) = delete;
- FlashIndexStorage& operator=(FlashIndexStorage&&) = default;
- FlashIndexStorage& operator=(const FlashIndexStorage&) = delete;
+ // Allocates a new IndexBlock with a single max-sized PostingListUsed. This
+ // chains index blocks by setting the next_block_index field of this new
+ // block's header to be prev_block_index and returns a PostingListHolder
+ // containing a max-sized PostingListUsed.
+ //
+ // RETURNS:
+ // - On success, a valid instance of PostingListHolder containing the
+ // requested PostingListUsed.
+ // - RESOURCE_EXHAUSTED_ERROR if unable to grow the index to create a
+ // PostingListUsed of max size
+ // - Any IndexBlock errors
+ libtextclassifier3::StatusOr<PostingListHolder>
+ AllocateAndChainMaxSizePostingList(uint32_t prev_block_index);
- // Free the PostingListUsed that this holder holds.
- void FreePostingList(PostingListHolder holder);
+ // Frees the PostingListUsed that this holder holds.
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status FreePostingList(PostingListHolder&& holder);
+
+ // Writes back the PostingListUsed that this holder holds to disk.
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status WritePostingListToDisk(
+ const PostingListHolder& holder);
+
+ // Discards all existing data by deleting the existing file and
+ // re-initializing a new one.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to delete existing files or initialize a new
+ // file with header
+ libtextclassifier3::Status Reset();
// Used to track the largest docid indexed in the index.
DocumentId get_last_indexed_docid() const {
@@ -134,7 +182,7 @@ class FlashIndexStorage {
// Returns the size of the index file in bytes.
int64_t GetDiskUsage() const {
- return filesystem_->GetDiskUsage(block_fd_.get());
+ return filesystem_->GetDiskUsage(storage_sfd_.get());
}
// Returns the size of the index file used to contains data.
@@ -145,6 +193,12 @@ class FlashIndexStorage {
int num_blocks() const { return num_blocks_; }
+ // Gets the byte size of max sized posting list.
+ uint32_t max_posting_list_bytes() const {
+ return IndexBlock::CalculateMaxPostingListBytes(
+ block_size(), serializer_->GetDataTypeBytes());
+ }
+
// Info about the index based on the block size.
int block_size() const { return header_block_->header()->block_size; }
@@ -161,16 +215,19 @@ class FlashIndexStorage {
const PostingListSerializer* serializer() const { return serializer_; }
PostingListSerializer* serializer() { return serializer_; }
- libtextclassifier3::Status Reset();
-
// TODO(b/222349894) Convert the string output to a protocol buffer instead.
void GetDebugInfo(DebugInfoVerbosity::Code verbosity, std::string* out) const;
private:
- explicit FlashIndexStorage(const std::string& index_filename,
- const Filesystem* filesystem,
+ explicit FlashIndexStorage(const Filesystem* filesystem,
+ std::string&& index_filename,
PostingListSerializer* serializer,
- bool has_in_memory_freelists);
+ bool has_in_memory_freelists)
+ : filesystem_(filesystem),
+ index_filename_(std::move(index_filename)),
+ serializer_(serializer),
+ num_blocks_(0),
+ has_in_memory_freelists_(has_in_memory_freelists) {}
// Init the index from persistence. Create if file does not exist. We do not
// erase corrupt files.
@@ -189,31 +246,38 @@ class FlashIndexStorage {
// the values stored in it.
bool OpenHeader(int64_t file_size);
- // Add the IndexBlock referred to by block_index in the on-disk free list with
- // index block_info_index.
+ // Adds the IndexBlock referred to by block_index in the on-disk free list
+ // with index block_info_index.
void AddToOnDiskFreeList(uint32_t block_index, int block_info_index,
IndexBlock* index_block);
- // Remove the IndexBlock referred to by block_index from the Header free list
+ // Removes the IndexBlock referred to by block_index from the Header free list
// with index block_info_index.
- void RemoveFromOnDiskFreeList(uint32_t block_index, int block_info_index,
- IndexBlock* index_block);
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status RemoveFromOnDiskFreeList(uint32_t block_index,
+ int block_info_index,
+ IndexBlock* index_block);
- // Returns:
+ // RETURNS:
// - On success, a valid PostingListHolder created from the first entry of
// the in-memory freelist at block_info_index
- // - NOT_FOUND if there was no entry in the freelist
- // - RESOURCE_EXHAUSTED if the PostingList in the freelist couldn't be
- // allocated for some reason.
+ // - OUT_OF_RANGE_ERROR if in_memory_freelists_ contains
+ // PostingListIdentifier with block_index >= num_blocks_
+ // - NOT_FOUND_ERROR if there was no entry in the freelist
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder>
GetPostingListFromInMemoryFreeList(int block_info_index);
- // Returns:
+ // RETURNS:
// - On success, a valid PostingListHolder created from the first entry of
// the on-disk freelist at block_info_index
- // - NOT_FOUND if there was no entry in the freelist
- // - RESOURCE_EXHAUSTED if the PostingList in the freelist couldn't be
- // allocated for some reason.
+ // - OUT_OF_RANGE_ERROR if header()->index_block_infos[block_info_index]
+ // contains block_index >= num_blocks_
+ // - NOT_FOUND_ERROR if there was no entry in the freelist
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder>
GetPostingListFromOnDiskFreeList(int block_info_index);
@@ -222,22 +286,24 @@ class FlashIndexStorage {
// IndexBlock.
// - RESOURCE_EXHAUSTED if the index couldn't be grown to fit a new
// IndexBlock.
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<PostingListHolder> AllocateNewPostingList(
int block_info_index);
// Returns:
// - On success, a newly created IndexBlock at block_index with posting
// lists of size posting_list_size
- // - INTERNAL_ERROR if unable to access the region in file representing the
- // IndexBlock
+ // - OUT_OF_RANGE_ERROR if block_index >= num_blocks_
+ // - Any IndexBlock errors
libtextclassifier3::StatusOr<IndexBlock> CreateIndexBlock(
- int block_index, uint32_t posting_list_size) const;
+ uint32_t block_index, uint32_t posting_list_size) const;
// Returns:
// - On success, the IndexBlock that exists at block_index
- // - INTERNAL_ERROR if unable to access the region in file representing the
- // IndexBlock
- libtextclassifier3::StatusOr<IndexBlock> GetIndexBlock(int block_index) const;
+ // - OUT_OF_RANGE_ERROR if block_index >= num_blocks_
+ // - Any IndexBlock errors
+ libtextclassifier3::StatusOr<IndexBlock> GetIndexBlock(
+ uint32_t block_index) const;
// Add a new block to the end of the file and return its block
// index. Returns kInvalidBlockIndex if unable to grow the index file.
@@ -249,13 +315,20 @@ class FlashIndexStorage {
int FindBestIndexBlockInfo(uint32_t posting_list_bytes) const;
// Flushes the in-memory free list to disk.
- void FlushInMemoryFreeList();
+ //
+ // RETURNS:
+ // - OK on success
+ // - Any IndexBlock errors
+ libtextclassifier3::Status FlushInMemoryFreeList();
- // Underlying filename.
+ const Filesystem* filesystem_; // not owned; can't be null
std::string index_filename_;
+ PostingListSerializer* serializer_; // not owned; can't be null
+
// We open the index file into this fd.
- ScopedFd block_fd_;
+ ScopedFd storage_sfd_;
+
int num_blocks_; // can be inferred from index file size
std::unique_ptr<HeaderBlock> header_block_;
@@ -285,10 +358,6 @@ class FlashIndexStorage {
};
std::vector<FreeList> in_memory_freelists_;
- const Filesystem* filesystem_; // not owned; can't be null
-
- PostingListSerializer* serializer_; // not owned; can't be null
-
bool has_in_memory_freelists_;
};
diff --git a/icing/file/posting_list/index-block.cc b/icing/file/posting_list/index-block.cc
index 87641b1..3fa397c 100644
--- a/icing/file/posting_list/index-block.cc
+++ b/icing/file/posting_list/index-block.cc
@@ -18,13 +18,14 @@
#include <cstdint>
#include <memory>
-#include <string_view>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
-#include "icing/file/memory-mapped-file.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-free.h"
+#include "icing/file/posting_list/posting-list-used.h"
#include "icing/file/posting_list/posting-list-utils.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/logging.h"
@@ -55,84 +56,50 @@ libtextclassifier3::Status ValidatePostingListBytes(
} // namespace
-libtextclassifier3::StatusOr<IndexBlock>
+/* static */ libtextclassifier3::StatusOr<IndexBlock>
IndexBlock::CreateFromPreexistingIndexBlockRegion(
- const Filesystem& filesystem, std::string_view file_path,
- PostingListSerializer* serializer, off_t offset, uint32_t block_size) {
+ const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
+ off_t block_file_offset, uint32_t block_size) {
if (block_size < sizeof(BlockHeader)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Provided block_size %d is too small to fit even the BlockHeader!",
block_size));
}
- ICING_ASSIGN_OR_RETURN(MemoryMappedFile mmapped_file,
- MemoryMappedFile::Create(
- filesystem, file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
- IndexBlock block(serializer, std::move(mmapped_file));
+
+ BlockHeader header;
+ if (!filesystem->PRead(fd, &header, sizeof(BlockHeader), block_file_offset)) {
+ return absl_ports::InternalError("PRead block header error");
+ }
+
ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
- serializer, block.get_posting_list_bytes(), block_size));
- return block;
+ serializer, header.posting_list_bytes, block_size));
+
+ return IndexBlock(filesystem, serializer, fd, block_file_offset, block_size,
+ header.posting_list_bytes);
}
-libtextclassifier3::StatusOr<IndexBlock>
-IndexBlock::CreateFromUninitializedRegion(const Filesystem& filesystem,
- std::string_view file_path,
+/* static */ libtextclassifier3::StatusOr<IndexBlock>
+IndexBlock::CreateFromUninitializedRegion(const Filesystem* filesystem,
PostingListSerializer* serializer,
- off_t offset, uint32_t block_size,
+ int fd, off_t block_file_offset,
+ uint32_t block_size,
uint32_t posting_list_bytes) {
if (block_size < sizeof(BlockHeader)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Provided block_size %d is too small to fit even the BlockHeader!",
block_size));
}
+
ICING_RETURN_IF_ERROR(
ValidatePostingListBytes(serializer, posting_list_bytes, block_size));
- ICING_ASSIGN_OR_RETURN(MemoryMappedFile mmapped_file,
- MemoryMappedFile::Create(
- filesystem, file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
- IndexBlock block(serializer, std::move(mmapped_file));
- // Safe to ignore the return value of Reset. Reset returns an error if
- // posting_list_bytes is invalid, but this function ensures that
- // posting_list_bytes is valid thanks to the call to ValidatePostingListBytes
- // above.
- block.Reset(posting_list_bytes);
- return block;
-}
-
-IndexBlock::IndexBlock(PostingListSerializer* serializer,
- MemoryMappedFile&& mmapped_block)
- : header_(reinterpret_cast<BlockHeader*>(mmapped_block.mutable_region())),
- posting_lists_start_ptr_(mmapped_block.mutable_region() +
- sizeof(BlockHeader)),
- block_size_in_bytes_(mmapped_block.region_size()),
- serializer_(serializer),
- mmapped_block_(
- std::make_unique<MemoryMappedFile>(std::move(mmapped_block))) {}
-
-libtextclassifier3::Status IndexBlock::Reset(int posting_list_bytes) {
- ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
- serializer_, posting_list_bytes, mmapped_block_->region_size()));
- header_->free_list_posting_list_index = kInvalidPostingListIndex;
- header_->next_block_index = kInvalidBlockIndex;
- header_->posting_list_bytes = posting_list_bytes;
+ IndexBlock block(filesystem, serializer, fd, block_file_offset, block_size,
+ posting_list_bytes);
+ ICING_RETURN_IF_ERROR(block.Reset());
- // Starting with the last posting list, prepend each posting list to the free
- // list. At the end, the beginning of the free list should be the first
- // posting list.
- for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
- posting_list_index >= 0; --posting_list_index) {
- // Adding the posting list at posting_list_index to the free list will
- // modify both the posting list and also
- // header_->free_list_posting_list_index.
- FreePostingList(posting_list_index);
- }
- return libtextclassifier3::Status::OK;
+ return block;
}
-libtextclassifier3::StatusOr<PostingListUsed>
+libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -140,71 +107,226 @@ IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
"posting lists.",
posting_list_index, max_num_posting_lists()));
}
- return PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
- serializer_, get_posting_list_ptr(posting_list_index),
- get_posting_list_bytes());
+
+ // Read out the header from disk.
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+
+ // Read out the allocated posting list from disk.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ ReadPostingList(posting_list_index));
+
+ ICING_ASSIGN_OR_RETURN(
+ PostingListUsed pl_used,
+ PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
+ serializer_, std::move(posting_list_buffer), posting_list_bytes_));
+ return PostingListAndBlockInfo(
+ std::move(pl_used), posting_list_index, header.next_block_index,
+ /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
+ kInvalidPostingListIndex);
}
-libtextclassifier3::StatusOr<PostingListIndex>
+libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
IndexBlock::AllocatePostingList() {
- if (!has_free_posting_lists()) {
+ // Read out the header from disk.
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+
+ if (header.free_list_posting_list_index == kInvalidPostingListIndex) {
return absl_ports::ResourceExhaustedError(
"No available posting lists to allocate.");
}
// Pull one off the free list.
- PostingListIndex posting_list_index = header_->free_list_posting_list_index;
+ PostingListIndex posting_list_index = header.free_list_posting_list_index;
- // We know at this point that posting_list_bytes will return a valid pl size
- // (because an already initialized IndexBlock instance can't have an invalid
- // posting_list_bytes). So CreateFromPreexistingPostingListFreeRegion will
- // always return OK and ValueOrDie is safe to call.
- auto posting_list_or =
+ // Read out the posting list from disk.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ ReadPostingList(posting_list_index));
+ // Step 1: get the next (chained) free posting list index and set it to block
+ // header.
+ ICING_ASSIGN_OR_RETURN(
+ PostingListFree pl_free,
PostingListFree::CreateFromPreexistingPostingListFreeRegion(
- get_posting_list_ptr(posting_list_index), get_posting_list_bytes(),
+ posting_list_buffer.get(), posting_list_bytes_,
serializer_->GetDataTypeBytes(),
- serializer_->GetMinPostingListSize());
- PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
-
- header_->free_list_posting_list_index = plfree.get_next_posting_list_index();
- if (header_->free_list_posting_list_index != kInvalidPostingListIndex &&
- header_->free_list_posting_list_index >= max_num_posting_lists()) {
+ serializer_->GetMinPostingListSize()));
+ header.free_list_posting_list_index = pl_free.get_next_posting_list_index();
+ if (header.free_list_posting_list_index != kInvalidPostingListIndex &&
+ header.free_list_posting_list_index >= max_num_posting_lists()) {
ICING_LOG(ERROR)
<< "Free Posting List points to an invalid posting list index!";
- header_->free_list_posting_list_index = kInvalidPostingListIndex;
+ header.free_list_posting_list_index = kInvalidPostingListIndex;
}
- // Make it a used posting list.
- PostingListUsed::CreateFromUnitializedRegion(
- serializer_, get_posting_list_ptr(posting_list_index),
- get_posting_list_bytes());
- return posting_list_index;
+ // Step 2: create PostingListUsed instance. The original content in the above
+ // posting_list_buffer is not important now because
+ // PostingListUsed::CreateFromUnitializedRegion will wipe it out, and
+ // we only need to sync it to disk after initializing.
+ ICING_ASSIGN_OR_RETURN(PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer_, posting_list_bytes_));
+
+ // Sync the initialized posting list (overwrite the original content of
+ // PostingListFree) and header to disk.
+ ICING_RETURN_IF_ERROR(
+ WritePostingList(posting_list_index, pl_used.posting_list_buffer()));
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+ return PostingListAndBlockInfo(
+ std::move(pl_used), posting_list_index, header.next_block_index,
+ /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
+ kInvalidPostingListIndex);
}
-void IndexBlock::FreePostingList(PostingListIndex posting_list_index) {
+libtextclassifier3::Status IndexBlock::FreePostingList(
+ PostingListIndex posting_list_index) {
if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
- ICING_LOG(ERROR) << "Cannot free posting list with index "
- << posting_list_index << " in IndexBlock with only "
- << max_num_posting_lists() << " posting lists.";
- return;
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Cannot free posting list with index %d in IndexBlock with only %d "
+ "posting lists.",
+ posting_list_index, max_num_posting_lists()));
+ }
+
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::WritePostingListToDisk(
+ const PostingListUsed& posting_list_used,
+ PostingListIndex posting_list_index) {
+ if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Cannot write posting list with index %d in IndexBlock with only %d "
+ "posting lists.",
+ posting_list_index, max_num_posting_lists()));
+ }
+
+ if (posting_list_used.size_in_bytes() != posting_list_bytes_) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot write posting list into a block with different posting list "
+ "bytes");
}
- // We know at this point that posting_list_bytes will return a valid pl size.
- // So CreateFromUninitializedRegion will always return OK and ValueOrDie is
- // safe to call.
- auto posting_list_or = PostingListFree::CreateFromUnitializedRegion(
- get_posting_list_ptr(posting_list_index), get_posting_list_bytes(),
- serializer_->GetDataTypeBytes(), serializer_->GetMinPostingListSize());
- PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
+ if (!posting_list_used.is_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Write the allocated posting list to disk.
+ return WritePostingList(posting_list_index,
+ posting_list_used.posting_list_buffer());
+}
+
+libtextclassifier3::StatusOr<uint32_t> IndexBlock::GetNextBlockIndex() const {
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ return header.next_block_index;
+}
+
+libtextclassifier3::Status IndexBlock::SetNextBlockIndex(
+ uint32_t next_block_index) {
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ header.next_block_index = next_block_index;
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<bool> IndexBlock::HasFreePostingLists() const {
+ ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+ return header.free_list_posting_list_index != kInvalidPostingListIndex;
+}
+
+libtextclassifier3::Status IndexBlock::Reset() {
+ BlockHeader header;
+ header.free_list_posting_list_index = kInvalidPostingListIndex;
+ header.next_block_index = kInvalidBlockIndex;
+ header.posting_list_bytes = posting_list_bytes_;
+
+ // Starting with the last posting list, prepend each posting list to the free
+ // list. At the end, the beginning of the free list should be the first
+ // posting list.
+ for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
+ posting_list_index >= 0; --posting_list_index) {
+ // Adding the posting list at posting_list_index to the free list will
+ // modify both the posting list and also
+ // header.free_list_posting_list_index.
+ ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
+ }
+
+ // Sync the header to disk.
+ ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::FreePostingListImpl(
+ BlockHeader& header, PostingListIndex posting_list_index) {
+ // Read out the posting list from disk.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ ReadPostingList(posting_list_index));
+
+ ICING_ASSIGN_OR_RETURN(PostingListFree plfree,
+ PostingListFree::CreateFromUnitializedRegion(
+ posting_list_buffer.get(), posting_list_bytes(),
+ serializer_->GetDataTypeBytes(),
+ serializer_->GetMinPostingListSize()));
// Put at the head of the list.
- plfree.set_next_posting_list_index(header_->free_list_posting_list_index);
- header_->free_list_posting_list_index = posting_list_index;
+ plfree.set_next_posting_list_index(header.free_list_posting_list_index);
+ header.free_list_posting_list_index = posting_list_index;
+
+ // Sync the posting list to disk.
+ ICING_RETURN_IF_ERROR(
+ WritePostingList(posting_list_index, posting_list_buffer.get()));
+ return libtextclassifier3::Status::OK;
}
-char* IndexBlock::get_posting_list_ptr(PostingListIndex posting_list_index) {
- return posting_lists_start_ptr_ +
- get_posting_list_bytes() * posting_list_index;
+libtextclassifier3::StatusOr<IndexBlock::BlockHeader> IndexBlock::ReadHeader()
+ const {
+ BlockHeader header;
+ if (!filesystem_->PRead(fd_, &header, sizeof(BlockHeader),
+ block_file_offset_)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PRead block header error: ", strerror(errno)));
+ }
+ if (header.posting_list_bytes != posting_list_bytes_) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Inconsistent posting list bytes between block header (%d) and class "
+ "instance (%d)",
+ header.posting_list_bytes, posting_list_bytes_));
+ }
+ return header;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>>
+IndexBlock::ReadPostingList(PostingListIndex posting_list_index) const {
+ auto posting_list_buffer = std::make_unique<uint8_t[]>(posting_list_bytes_);
+ if (!filesystem_->PRead(fd_, posting_list_buffer.get(), posting_list_bytes_,
+ get_posting_list_file_offset(posting_list_index))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PRead posting list error: ", strerror(errno)));
+ }
+ return posting_list_buffer;
+}
+
+libtextclassifier3::Status IndexBlock::WriteHeader(const BlockHeader& header) {
+ if (!filesystem_->PWrite(fd_, block_file_offset_, &header,
+ sizeof(BlockHeader))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PWrite block header error: ", strerror(errno)));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::WritePostingList(
+ PostingListIndex posting_list_index, const uint8_t* posting_list_buffer) {
+ if (!filesystem_->PWrite(fd_,
+ get_posting_list_file_offset(posting_list_index),
+ posting_list_buffer, posting_list_bytes_)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("PWrite posting list error: ", strerror(errno)));
+ }
+ return libtextclassifier3::Status::OK;
}
} // namespace lib
diff --git a/icing/file/posting_list/index-block.h b/icing/file/posting_list/index-block.h
index adbaf3d..21ad13f 100644
--- a/icing/file/posting_list/index-block.h
+++ b/icing/file/posting_list/index-block.h
@@ -19,9 +19,10 @@
#include <cstdint>
#include <memory>
-#include <string_view>
-#include "icing/file/memory-mapped-file.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/legacy/index/icing-bit-util.h"
@@ -31,15 +32,27 @@ namespace lib {
// This class is used to manage I/O to a single flash block and to manage the
// division of that flash block into PostingLists. It provides an interface to
-// allocate, free and read posting lists.
+// allocate, free and read posting lists. Note that IndexBlock is stateless:
+// - Any changes to block header will be synced to disk before the method
+// returns.
+// - Any posting list allocation/freeing will be synced to disk before the
+// method returns.
+// - When getting an allocated posting list, it PReads the contents from disk to
+// a buffer and transfer the ownership to PostingListUsed. Any changes to
+// PostingListUsed will not be visible to other instances until calling
+// WritePostingListToDisk.
//
// An IndexBlock contains a small header and an array of fixed-size posting list
// buffers. Initially, all posting lists are chained in a singly-linked free
// list.
//
-// When we want to get a new PostingList from an IndexBlock, we just
-// pull one off the free list. When the user wants to return the
-// PostingList to the free pool, we prepend it to the free list.
+// When we want to get a new PostingList from an IndexBlock, we just pull one
+// off the free list. When the user wants to return the PostingList to the free
+// pool, we prepend it to the free list.
+//
+// Read-write the same block is NOT thread safe. If we try to read-write the
+// same block at the same time (either by the same or different IndexBlock
+// instances), then it causes race condition and the behavior is undefined.
class IndexBlock {
public:
// What is the maximum posting list size in bytes that can be stored in this
@@ -50,35 +63,57 @@ class IndexBlock {
data_type_bytes;
}
- // Create an IndexBlock to reference the previously used region of the
- // mmapped_file starting at offset with size block_size
+ // Creates an IndexBlock to reference the previously used region of the file
+ // descriptor starting at block_file_offset with size block_size.
+ //
+ // - serializer: for reading/writing posting list. Also some additional
+ // information (e.g. data size) should be provided by the
+ // serializer.
+ // - fd: a valid file descriptor opened for write by the caller.
+ // - block_file_offset: absolute offset of the file (fd).
+ // - block_size: byte size of this block.
+ //
+ // Unlike CreateFromUninitializedRegion, a pre-existing index block has
+ // already determined and written posting list bytes into block header, so it
+ // will be read from block header and the caller doesn't have to provide.
//
// RETURNS:
- // - a valid IndexBlock on success
- // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
- // if the posting list size stored in the region is not a valid posting
- // list size or it exceeds max_posting_list_bytes(size).
- // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
+ // - A valid IndexBlock instance on success
+ // - INVALID_ARGUMENT_ERROR
+ // - If block_size is too small for even just the BlockHeader
+ // - If the posting list size stored in the region is not a valid posting
+ // list size (e.g. exceeds max_posting_list_bytes(size))
+ // - INTERNAL_ERROR on I/O error
static libtextclassifier3::StatusOr<IndexBlock>
- CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
- std::string_view file_path,
+ CreateFromPreexistingIndexBlockRegion(const Filesystem* filesystem,
PostingListSerializer* serializer,
- off_t offset, uint32_t block_size);
+ int fd, off_t block_file_offset,
+ uint32_t block_size);
- // Create an IndexBlock to reference an uninitialized region of the
- // mmapped_file starting at offset with size block_size. The IndexBlock will
- // initialize the region to be an empty IndexBlock with posting lists of size
- // posting_list_bytes.
+ // Creates an IndexBlock to reference an uninitialized region of the file
+ // descriptor starting at block_file_offset with size block_size. The
+ // IndexBlock will initialize the region to be an empty IndexBlock with
+ // posting lists of size posting_list_bytes.
+ //
+ // - serializer: for reading/writing posting list. Also some additional
+ // information (e.g. data size) should be provided by the
+ // serializer.
+ // - fd: a valid file descriptor opened for write by the caller.
+ // - block_file_offset: absolute offset of the file (fd).
+ // - block_size: byte size of this block.
+ // - posting_list_bytes: byte size of all posting lists in this block. This
+ // information will be written into block header.
//
// RETURNS:
- // - a valid IndexBlock on success
- // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
- // if posting_list_bytes is not a valid posting list size or it exceeds
- // max_posting_list_bytes(size).
- // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
+ // - A valid IndexBlock instance on success
+ // - INVALID_ARGUMENT_ERROR
+ // - If block_size is too small for even just the BlockHeader
+ // - If the posting list size stored in the region is not a valid posting
+ // list size (e.g. exceeds max_posting_list_bytes(size))
+ // - INTERNAL_ERROR on I/O error
static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion(
- const Filesystem& filesystem, std::string_view file_path,
- PostingListSerializer* serializer, off_t offset, uint32_t block_size,
+ const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
+ off_t block_file_offset, uint32_t block_size,
uint32_t posting_list_bytes);
IndexBlock(const IndexBlock&) = delete;
@@ -86,30 +121,53 @@ class IndexBlock {
IndexBlock(IndexBlock&&) = default;
IndexBlock& operator=(IndexBlock&&) = default;
- ~IndexBlock() {
- if (mmapped_block_ != nullptr) {
- mmapped_block_->PersistToDisk();
- }
- }
+ ~IndexBlock() = default;
+
+ struct PostingListAndBlockInfo {
+ PostingListUsed posting_list_used;
+ PostingListIndex posting_list_index;
+
+ uint32_t next_block_index;
+
+ // Flag indicating if there are any free posting lists available after this
+ // allocation request.
+ bool has_free_posting_lists;
+
+ explicit PostingListAndBlockInfo(PostingListUsed&& posting_list_used_in,
+ PostingListIndex posting_list_index_in,
+ uint32_t next_block_index_in,
+ bool has_free_posting_lists_in)
+ : posting_list_used(std::move(posting_list_used_in)),
+ posting_list_index(posting_list_index_in),
+ next_block_index(next_block_index_in),
+ has_free_posting_lists(has_free_posting_lists_in) {}
+ };
- // Instantiate a PostingListUsed at posting_list_index with the existing
- // content in the IndexBlock.
+ // PReads existing posting list content at posting_list_index, instantiates a
+ // PostingListUsed, and returns it with some additional index block info.
//
// RETURNS:
- // - a valid PostingListUsed on success
- // - INVALID_ARGUMENT if posting_list_index >= max_num_posting_lists()
- libtextclassifier3::StatusOr<PostingListUsed> GetAllocatedPostingList(
+ // - A valid PostingListAndBlockInfo on success
+ // - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
+ // >= max_num_posting_lists()
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<PostingListAndBlockInfo> GetAllocatedPostingList(
PostingListIndex posting_list_index);
- // Allocates a PostingListUsed in the IndexBlock, if possible.
+ // Allocates a PostingListUsed in the IndexBlock, initializes the content
+ // (by serializer), and returns the initialized PostingListUsed instance,
+ // PostingListIndex, and some additional index block info.
//
// RETURNS:
- // - a valid PostingListIndex that can be used to retrieve the allocated
- // PostingListUsed via a call to GetAllocatedPostingList
- // - RESOURCE_EXHAUSTED if !has_free_posting_lists()
- libtextclassifier3::StatusOr<PostingListIndex> AllocatePostingList();
+ // - A valid PostingListAndBlockInfo instance on success
+ // - RESOURCE_EXHAUSTED_ERROR if there is already no free posting list
+ // available, i.e. !HasFreePostingLists()
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<PostingListAndBlockInfo> AllocatePostingList();
- // Free posting list at posting_list_index.
+ // Frees a posting list at posting_list_index, adds it into the free list
+ // chain and updates block header. Both changes on posting list free and
+ // header will be synced to disk.
//
// It is considered an error to "double-free" a posting list. You should never
// call FreePostingList(index) with the same index twice, unless that index
@@ -127,62 +185,69 @@ class IndexBlock {
// index = block.AllocatePostingList();
// DoSomethingElse(block.GetAllocatedPostingList(index));
// // A-Ok! We called AllocatePostingList() since the last FreePostingList()
- // call. block.FreePostingList(index);
+ // // call.
+ // block.FreePostingList(index);
//
- // Has no effect if posting_list_index >= max_num_posting_lists().
- void FreePostingList(PostingListIndex posting_list_index);
+ // RETURNS:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
+ // >= max_num_posting_lists()
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status FreePostingList(
+ PostingListIndex posting_list_index);
- // Blocks can be chained. The interpretation of the chaining is up
- // to the caller.
- uint32_t next_block_index() const { return header_->next_block_index; }
+ // Writes back an allocated posting list (PostingListUsed) at
+ // posting_list_index to disk.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR
+ // - If posting_list_index < 0 or posting_list_index >=
+ // max_num_posting_lists()
+ // - If posting_list_used.size_in_bytes() != posting_list_bytes_
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status WritePostingListToDisk(
+ const PostingListUsed& posting_list_used,
+ PostingListIndex posting_list_index);
- void set_next_block_index(uint32_t next_block_index) {
- header_->next_block_index = next_block_index;
- }
+ // PReads to get the index of next block from block header. Blocks can be
+ // chained, and the interpretation of the chaining is up to the caller.
+ //
+ // RETURNS:
+ // - Next block index on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<uint32_t> GetNextBlockIndex() const;
+
+ // PWrites block header to set the index of next block.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status SetNextBlockIndex(uint32_t next_block_index);
+
+ // PReads to get whether or not there are available posting lists in the free
+ // list.
+ //
+ // RETURNS:
+ // - A bool value on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<bool> HasFreePostingLists() const;
// Retrieves the size (in bytes) of the posting lists in this IndexBlock.
- uint32_t get_posting_list_bytes() const {
- return header_->posting_list_bytes;
- }
+ uint32_t posting_list_bytes() const { return posting_list_bytes_; }
- // Maximum number of posting lists in the block.
+ // Retrieves maximum number of posting lists in the block.
uint32_t max_num_posting_lists() const {
- return total_posting_lists_bytes() / get_posting_list_bytes();
+ return total_posting_lists_bytes() / posting_list_bytes_;
}
- // Number of bits required to store the largest PostingListIndex in this
- // block.
+ // Retrieves number of bits required to store the largest PostingListIndex in
+ // this block.
int posting_list_index_bits() const {
return BitsToStore(max_num_posting_lists());
}
- // Returns whether or not there are available posting lists in the free list.
- bool has_free_posting_lists() const {
- return header_->free_list_posting_list_index != kInvalidPostingListIndex;
- }
-
private:
- // Assumes that mmapped_file already has established a valid mapping to the
- // requested block.
- explicit IndexBlock(PostingListSerializer* serializer,
- MemoryMappedFile&& mmapped_block);
-
- // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
- // all posting lists to the free list.
- //
- // RETURNS:
- // - OK, on success
- // - INVALID_ARGUMENT if posting_list_bytes is a valid posting list size.
- libtextclassifier3::Status Reset(int posting_list_bytes);
-
- char* get_posting_list_ptr(PostingListIndex posting_list_index);
-
- // Bytes in the block available for posting lists (minus header,
- // alignment, etc.).
- uint32_t total_posting_lists_bytes() const {
- return block_size_in_bytes_ - sizeof(BlockHeader);
- }
-
struct BlockHeader {
// Index of the next block if this block is being chained or part of a free
// list.
@@ -192,21 +257,110 @@ class IndexBlock {
// of the free list.
PostingListIndex free_list_posting_list_index;
- // The size of each posting list in the IndexBlock.
+ // The size of each posting list in the IndexBlock. This value will be
+ // initialized when calling CreateFromUninitializedRegion once and remain
+ // unchanged.
uint32_t posting_list_bytes;
};
- // Pointer to the header of this block. The header is used to store info about
- // this block and its posting lists.
- BlockHeader* header_;
- // Pointer to the beginning of the posting lists region - the area the block
- // after the header.
- char* posting_lists_start_ptr_;
- uint32_t block_size_in_bytes_;
+
+ // Assumes that fd has been opened for write.
+ explicit IndexBlock(const Filesystem* filesystem,
+ PostingListSerializer* serializer, int fd,
+ off_t block_file_offset, uint32_t block_size_in_bytes,
+ uint32_t posting_list_bytes)
+ : filesystem_(filesystem),
+ serializer_(serializer),
+ fd_(fd),
+ block_file_offset_(block_file_offset),
+ block_size_in_bytes_(block_size_in_bytes),
+ posting_list_bytes_(posting_list_bytes) {}
+
+ // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
+ // all posting lists to the free list.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Reset();
+
+ // Frees a posting list at posting_list_index, adds it into the free list
+ // chain and updates (sets) the given block header instance.
+ //
+ // - This function is served to avoid redundant block header PWrite when
+ // freeing multiple posting lists.
+ // - The caller should provide a BlockHeader instance for updating the free
+ // list chain, and finally sync it to disk.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status FreePostingListImpl(
+ BlockHeader& header, PostingListIndex posting_list_index);
+
+ // PReads block header from the file.
+ //
+ // RETURNS:
+ // - A BlockHeader instance on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<BlockHeader> ReadHeader() const;
+
+ // PReads posting list content at posting_list_index. Note that it can be a
+ // freed or allocated posting list.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+ //
+ // RETURNS:
+ // - A data buffer with size = posting_list_bytes_ on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>> ReadPostingList(
+ PostingListIndex posting_list_index) const;
+
+ // PWrites block header to the file.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status WriteHeader(const BlockHeader& header);
+
+ // PWrites posting list content at posting_list_index. Note that it can be a
+ // freed or allocated posting list.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() and size of
+ // posting_list_buffer is posting_list_bytes_.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status WritePostingList(
+ PostingListIndex posting_list_index, const uint8_t* posting_list_buffer);
+
+ // Retrieves the absolute file (fd) offset of a posting list at
+ // posting_list_index.
+ //
+ // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+ off_t get_posting_list_file_offset(
+ PostingListIndex posting_list_index) const {
+ return block_file_offset_ + sizeof(BlockHeader) +
+ posting_list_bytes_ * posting_list_index;
+ }
+
+ // Retrieves the byte size in the block available for posting lists (excluding
+ // the size of block header).
+ uint32_t total_posting_lists_bytes() const {
+ return block_size_in_bytes_ - sizeof(BlockHeader);
+ }
+
+ const Filesystem* filesystem_; // Does not own.
PostingListSerializer* serializer_; // Does not own.
- // MemoryMappedFile used to interact with the underlying flash block.
- std::unique_ptr<MemoryMappedFile> mmapped_block_;
+ int fd_; // Does not own.
+
+ off_t block_file_offset_;
+ uint32_t block_size_in_bytes_;
+ uint32_t posting_list_bytes_;
};
} // namespace lib
diff --git a/icing/file/posting_list/index-block_test.cc b/icing/file/posting_list/index-block_test.cc
index adb2708..fcc134a 100644
--- a/icing/file/posting_list/index-block_test.cc
+++ b/icing/file/posting_list/index-block_test.cc
@@ -30,6 +30,8 @@ namespace {
using ::testing::ElementsAreArray;
using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
static constexpr int kBlockSize = 4096;
@@ -40,8 +42,12 @@ class IndexBlockTest : public ::testing::Test {
flash_file_ = test_dir_ + "/0";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+ sfd_ = std::make_unique<ScopedFd>(
+ filesystem_.OpenForWrite(flash_file_.c_str()));
+ ASSERT_TRUE(sfd_->is_valid());
+
// Grow the file by one block for the IndexBlock to use.
- ASSERT_TRUE(filesystem_.Grow(flash_file_.c_str(), kBlockSize));
+ ASSERT_TRUE(filesystem_.Grow(sfd_->get(), kBlockSize));
// TODO: test different serializers
serializer_ = std::make_unique<PostingListHitSerializer>();
@@ -49,12 +55,14 @@ class IndexBlockTest : public ::testing::Test {
void TearDown() override {
serializer_.reset();
+ sfd_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
}
+ Filesystem filesystem_;
std::string test_dir_;
std::string flash_file_;
- Filesystem filesystem_;
+ std::unique_ptr<ScopedFd> sfd_;
std::unique_ptr<PostingListHitSerializer> serializer_;
};
@@ -65,9 +73,9 @@ TEST_F(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
- EXPECT_TRUE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
}
}
@@ -77,9 +85,9 @@ TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes1));
- EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes1));
+ EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes1));
// There should be (4096 - 12) / 20 = 204 posting lists
// (sizeof(BlockHeader)==12). We can store a PostingListIndex of 203 in only 8
// bits.
@@ -91,9 +99,9 @@ TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(), /*offset=*/0,
+ &filesystem_, serializer_.get(), sfd_->get(), /*offset=*/0,
kBlockSize, kPostingListBytes2));
- EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes2));
+ EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes2));
// There should be (4096 - 12) / 200 = 20 posting lists
// (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5
// bits.
@@ -116,32 +124,36 @@ TEST_F(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
- /*offset=*/0,
- /*block_size=*/kBlockSize, kPostingListBytes));
+ &filesystem_, serializer_.get(), sfd_->get(),
+ /*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
- ICING_ASSERT_OK_AND_ASSIGN(allocated_index, block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
- block.GetAllocatedPostingList(allocated_index));
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info,
+ block.AllocatePostingList());
for (const Hit& hit : test_hits) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info.posting_list_used, hit));
}
EXPECT_THAT(
- serializer_->GetHits(&pl_used),
+ serializer_->GetHits(&alloc_info.posting_list_used),
IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
+
+ ICING_ASSERT_OK(block.WritePostingListToDisk(
+ alloc_info.posting_list_used, alloc_info.posting_list_index));
+ allocated_index = alloc_info.posting_list_index;
}
{
// Create an IndexBlock from the previously allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
- block.GetAllocatedPostingList(allocated_index));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock::PostingListAndBlockInfo pl_block_info,
+ block.GetAllocatedPostingList(allocated_index));
EXPECT_THAT(
- serializer_->GetHits(&pl_used),
+ serializer_->GetHits(&pl_block_info.posting_list_used),
IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
- EXPECT_TRUE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
}
}
@@ -168,58 +180,64 @@ TEST_F(IndexBlockTest, IndexBlockMultiplePostingLists) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
- ICING_ASSERT_OK_AND_ASSIGN(allocated_index_1, block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_1,
- block.GetAllocatedPostingList(allocated_index_1));
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
+ block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list1) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_1, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
// Add hits to the second posting list.
- ICING_ASSERT_OK_AND_ASSIGN(allocated_index_2, block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_2,
- block.GetAllocatedPostingList(allocated_index_2));
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
+ block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list2) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_2, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_2),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
+
+ // Write both posting lists to disk.
+ ICING_ASSERT_OK(block.WritePostingListToDisk(
+ alloc_info_1.posting_list_used, alloc_info_1.posting_list_index));
+ ICING_ASSERT_OK(block.WritePostingListToDisk(
+ alloc_info_2.posting_list_used, alloc_info_2.posting_list_index));
+ allocated_index_1 = alloc_info_1.posting_list_index;
+ allocated_index_2 = alloc_info_2.posting_list_index;
}
{
// Create an IndexBlock from the previously allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_1,
+ IndexBlock::PostingListAndBlockInfo pl_block_info_1,
block.GetAllocatedPostingList(allocated_index_1));
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&pl_block_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
ICING_ASSERT_OK_AND_ASSIGN(
- PostingListUsed pl_used_2,
+ IndexBlock::PostingListAndBlockInfo pl_block_info_2,
block.GetAllocatedPostingList(allocated_index_2));
- EXPECT_THAT(serializer_->GetHits(&pl_used_2),
+ EXPECT_THAT(serializer_->GetHits(&pl_block_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
}
}
@@ -229,7 +247,7 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
@@ -240,14 +258,13 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
- ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1,
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_1,
- block.GetAllocatedPostingList(allocated_index_1));
for (const Hit& hit : hits_in_posting_list1) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_1, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
@@ -259,45 +276,44 @@ TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
};
- ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2,
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
block.AllocatePostingList());
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_2,
- block.GetAllocatedPostingList(allocated_index_2));
for (const Hit& hit : hits_in_posting_list2) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_2, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_2),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
// Now free the first posting list. Then, reallocate it and fill it with a
// different set of hits.
- block.FreePostingList(allocated_index_1);
- EXPECT_TRUE(block.has_free_posting_lists());
+ block.FreePostingList(alloc_info_1.posting_list_index);
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
std::vector<Hit> hits_in_posting_list3{
Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88),
Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency),
Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2),
};
- ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3,
+ ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_3,
block.AllocatePostingList());
- EXPECT_THAT(allocated_index_3, Eq(allocated_index_1));
- ICING_ASSERT_OK_AND_ASSIGN(pl_used_1,
- block.GetAllocatedPostingList(allocated_index_3));
+ EXPECT_THAT(alloc_info_3.posting_list_index,
+ Eq(alloc_info_3.posting_list_index));
for (const Hit& hit : hits_in_posting_list3) {
- ICING_ASSERT_OK(serializer_->PrependHit(&pl_used_1, hit));
+ ICING_ASSERT_OK(
+ serializer_->PrependHit(&alloc_info_3.posting_list_used, hit));
}
- EXPECT_THAT(serializer_->GetHits(&pl_used_1),
+ EXPECT_THAT(serializer_->GetHits(&alloc_info_3.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list3.rbegin(),
hits_in_posting_list3.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_FALSE(block.has_free_posting_lists());
+ EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
}
TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
@@ -309,29 +325,29 @@ TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
// next block index.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
- EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex));
- block.set_next_block_index(kSomeBlockIndex);
- EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex));
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
+ EXPECT_THAT(block.SetNextBlockIndex(kSomeBlockIndex), IsOk());
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
}
{
// Create an IndexBlock from this previously allocated file block and make
// sure that next_block_index is still set properly.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
- EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex));
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
}
{
// Create an IndexBlock, treating this file block as uninitialized. This
// reset the next_block_index to kInvalidBlockIndex.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
- filesystem_, flash_file_, serializer_.get(),
+ &filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
- EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex));
+ EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
}
}
diff --git a/icing/file/posting_list/posting-list-accessor.cc b/icing/file/posting_list/posting-list-accessor.cc
index 00f4417..67d7a21 100644
--- a/icing/file/posting_list/posting-list-accessor.cc
+++ b/icing/file/posting_list/posting-list-accessor.cc
@@ -19,7 +19,6 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/util/status-macros.h"
@@ -27,19 +26,21 @@
namespace icing {
namespace lib {
-void PostingListAccessor::FlushPreexistingPostingList() {
- if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
- // If this is a max-sized posting list, then just keep track of the id for
- // chaining. It'll be flushed to disk when preexisting_posting_list_ is
- // destructed.
+libtextclassifier3::Status PostingListAccessor::FlushPreexistingPostingList() {
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ // If this is a max-sized posting list, then sync to disk and keep track of
+ // the id.
+ ICING_RETURN_IF_ERROR(
+ storage_->WritePostingListToDisk(*preexisting_posting_list_));
prev_block_identifier_ = preexisting_posting_list_->id;
} else {
// If this is NOT a max-sized posting list, then our data have outgrown this
// particular posting list. Move the data into the in-memory posting list
// and free this posting list.
//
- // Move will always succeed since posting_list_buffer_ is max_pl_bytes.
- GetSerializer()->MoveFrom(/*dst=*/&posting_list_buffer_,
+ // Move will always succeed since in_memory_posting_list_ is max_pl_bytes.
+ GetSerializer()->MoveFrom(/*dst=*/&in_memory_posting_list_,
/*src=*/&preexisting_posting_list_->posting_list);
// Now that all the contents of this posting list have been copied, there's
@@ -48,58 +49,78 @@ void PostingListAccessor::FlushPreexistingPostingList() {
storage_->FreePostingList(std::move(*preexisting_posting_list_));
}
preexisting_posting_list_.reset();
+ return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status PostingListAccessor::FlushInMemoryPostingList() {
- // We exceeded max_pl_bytes(). Need to flush posting_list_buffer_ and update
- // the chain.
- uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- storage_->block_size(), GetSerializer()->GetDataTypeBytes());
+ // We exceeded max_pl_bytes(). Need to flush in_memory_posting_list_ and
+ // update the chain.
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
- storage_->AllocatePostingList(max_posting_list_bytes));
- holder.block.set_next_block_index(prev_block_identifier_.block_index());
+ storage_->AllocateAndChainMaxSizePostingList(
+ prev_block_identifier_.block_index()));
+ ICING_RETURN_IF_ERROR(
+ GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
+ /*src=*/&in_memory_posting_list_));
+ ICING_RETURN_IF_ERROR(storage_->WritePostingListToDisk(holder));
+
+ // Set prev block id only if persist to disk succeeded.
prev_block_identifier_ = holder.id;
- return GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
- /*src=*/&posting_list_buffer_);
+ return libtextclassifier3::Status::OK;
}
PostingListAccessor::FinalizeResult PostingListAccessor::Finalize() && {
if (preexisting_posting_list_ != nullptr) {
- // Our data are already in an existing posting list. Nothing else to do, but
- // return its id.
- return FinalizeResult(libtextclassifier3::Status::OK,
- preexisting_posting_list_->id);
+ // Sync to disk.
+ return FinalizeResult(
+ storage_->WritePostingListToDisk(*preexisting_posting_list_),
+ preexisting_posting_list_->id);
}
- if (GetSerializer()->GetBytesUsed(&posting_list_buffer_) <= 0) {
+
+ if (GetSerializer()->GetBytesUsed(&in_memory_posting_list_) <= 0) {
return FinalizeResult(absl_ports::InvalidArgumentError(
"Can't finalize an empty PostingListAccessor. "
"There's nothing to Finalize!"),
PostingListIdentifier::kInvalid);
}
- uint32_t posting_list_bytes =
- GetSerializer()->GetMinPostingListSizeToFit(&posting_list_buffer_);
+
+ libtextclassifier3::StatusOr<PostingListHolder> holder_or;
if (prev_block_identifier_.is_valid()) {
- posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- storage_->block_size(), GetSerializer()->GetDataTypeBytes());
+ // If prev_block_identifier_ is valid, then it means there was a max-sized
+ // posting list, so we have to allocate another new max size posting list
+ // and chain them together.
+ holder_or = storage_->AllocateAndChainMaxSizePostingList(
+ prev_block_identifier_.block_index());
+ } else {
+ // Otherwise, it is the first posting list, and we can use smaller size pl.
+ // Note that even if it needs a max-sized posting list here, it is ok to
+ // call AllocatePostingList without setting next block index since we don't
+ // have any previous posting list to chain and AllocatePostingList will set
+ // next block index to kInvalidBlockIndex.
+ uint32_t posting_list_bytes =
+ GetSerializer()->GetMinPostingListSizeToFit(&in_memory_posting_list_);
+ holder_or = storage_->AllocatePostingList(posting_list_bytes);
}
- auto holder_or = storage_->AllocatePostingList(posting_list_bytes);
+
if (!holder_or.ok()) {
return FinalizeResult(std::move(holder_or).status(),
prev_block_identifier_);
}
PostingListHolder holder = std::move(holder_or).ValueOrDie();
- if (prev_block_identifier_.is_valid()) {
- holder.block.set_next_block_index(prev_block_identifier_.block_index());
- }
// Move to allocated area. This should never actually return an error. We know
// that editor.posting_list() is valid because it wouldn't have successfully
- // returned by AllocatePostingList if it wasn't. We know posting_list_buffer_
- // is valid because we created it in-memory. And finally, we know that the
- // data from posting_list_buffer_ will fit in editor.posting_list() because we
- // requested it be at at least posting_list_bytes large.
+ // returned by AllocatePostingList if it wasn't. We know
+ // in_memory_posting_list_ is valid because we created it in-memory. And
+ // finally, we know that the data from in_memory_posting_list_ will fit in
+ // editor.posting_list() because we requested it be at at least
+ // posting_list_bytes large.
auto status = GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
- /*src=*/&posting_list_buffer_);
+ /*src=*/&in_memory_posting_list_);
+ if (!status.ok()) {
+ return FinalizeResult(std::move(status), prev_block_identifier_);
+ }
+
+ status = storage_->WritePostingListToDisk(holder);
if (!status.ok()) {
return FinalizeResult(std::move(status), prev_block_identifier_);
}
diff --git a/icing/file/posting_list/posting-list-accessor.h b/icing/file/posting_list/posting-list-accessor.h
index bfdbeb9..91f1f2d 100644
--- a/icing/file/posting_list/posting-list-accessor.h
+++ b/icing/file/posting_list/posting-list-accessor.h
@@ -28,9 +28,8 @@ namespace lib {
// This class serves to:
// 1. Expose PostingListUseds to clients of FlashIndexStorage
-// 2. Ensure the corresponding instance of IndexBlock has the same lifecycle as
-// the instance of PostingListUsed that the client has access to, while
-// not exposing IndexBlock's api surface.
+// 2. Handles flushing posting list properly, including choosing the most
+// efficient size of PL, chaining max-sized PL correctly, etc.
// 3. Ensure that PostingListUseds can only be freed by calling methods which
// will also properly maintain the FlashIndexStorage free list and prevent
// callers from modifying the Posting List after freeing.
@@ -65,27 +64,25 @@ class PostingListAccessor {
virtual PostingListSerializer* GetSerializer() = 0;
protected:
- explicit PostingListAccessor(
- FlashIndexStorage* storage,
- std::unique_ptr<uint8_t[]> posting_list_buffer_array,
- PostingListUsed posting_list_buffer)
+ explicit PostingListAccessor(FlashIndexStorage* storage,
+ PostingListUsed in_memory_posting_list)
: storage_(storage),
prev_block_identifier_(PostingListIdentifier::kInvalid),
- posting_list_buffer_array_(std::move(posting_list_buffer_array)),
- posting_list_buffer_(std::move(posting_list_buffer)),
+ in_memory_posting_list_(std::move(in_memory_posting_list)),
has_reached_posting_list_chain_end_(false) {}
// Flushes preexisting_posting_list_ to disk if it's a max-sized posting list
// and populates prev_block_identifier.
// If it's not a max-sized posting list, moves the contents of
- // preexisting_posting_list_ to posting_list_buffer_ and frees
+ // preexisting_posting_list_ to in_memory_posting_list_ and frees
// preexisting_posting_list_.
// Sets preexisting_posting_list_ to nullptr.
- void FlushPreexistingPostingList();
+ libtextclassifier3::Status FlushPreexistingPostingList();
- // Flushes posting_list_buffer_ to a max-sized posting list on disk, setting
- // its next pointer to prev_block_identifier_ and updating
- // prev_block_identifier_ to point to the just-written posting list.
+ // Flushes in_memory_posting_list_ to a max-sized posting list on disk, chains
+ // the newly allocated max-size posting list block by setting its next pointer
+ // to prev_block_identifier_, and updates prev_block_identifier_ to point to
+ // the newly allocated posting list.
libtextclassifier3::Status FlushInMemoryPostingList();
// Frees all posting lists in the posting list chain starting at
@@ -102,19 +99,15 @@ class PostingListAccessor {
// An editor to an existing posting list on disk. If available (non-NULL),
// we'll try to add all data to this posting list. Once this posting list
// fills up, we'll either 1) chain it (if a max-sized posting list) and put
- // future data in posting_list_buffer_ or 2) copy all of its data into
- // posting_list_buffer_ and free this pl (if not a max-sized posting list).
+ // future data in in_memory_posting_list_ or 2) copy all of its data into
+ // in_memory_posting_list_ and free this pl (if not a max-sized posting list).
// TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using
// existing posting lists has on latency.
std::unique_ptr<PostingListHolder> preexisting_posting_list_;
// In-memory posting list used to buffer data before writing them to the
// smallest on-disk posting list that will fit them.
- // posting_list_buffer_array_ owns the memory region that posting_list_buffer_
- // interprets. Therefore, posting_list_buffer_array_ must have the same
- // lifecycle as posting_list_buffer_.
- std::unique_ptr<uint8_t[]> posting_list_buffer_array_;
- PostingListUsed posting_list_buffer_;
+ PostingListUsed in_memory_posting_list_;
bool has_reached_posting_list_chain_end_;
};
diff --git a/icing/file/posting_list/posting-list-identifier.h b/icing/file/posting_list/posting-list-identifier.h
index 54b2888..78821e8 100644
--- a/icing/file/posting_list/posting-list-identifier.h
+++ b/icing/file/posting_list/posting-list-identifier.h
@@ -78,7 +78,7 @@ class PostingListIdentifier {
/*val=*/block_index);
}
- int block_index() const {
+ uint32_t block_index() const {
return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits);
}
diff --git a/icing/file/posting_list/posting-list-used.cc b/icing/file/posting_list/posting-list-used.cc
index 153d7f2..d049b13 100644
--- a/icing/file/posting_list/posting-list-used.cc
+++ b/icing/file/posting_list/posting-list-used.cc
@@ -15,6 +15,7 @@
#include "icing/file/posting_list/posting-list-used.h"
#include <cstdint>
+#include <memory>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
@@ -27,8 +28,8 @@ namespace lib {
libtextclassifier3::StatusOr<PostingListUsed>
PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
- PostingListSerializer* serializer, void* posting_list_buffer,
- uint32_t size_in_bytes) {
+ PostingListSerializer* serializer,
+ std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes) {
ICING_RETURN_ERROR_IF_NULL(serializer);
ICING_RETURN_ERROR_IF_NULL(posting_list_buffer);
@@ -38,16 +39,17 @@ PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Requested posting list size %d is invalid!", size_in_bytes));
}
- return PostingListUsed(posting_list_buffer, size_in_bytes);
+ return PostingListUsed(std::move(posting_list_buffer), size_in_bytes);
}
libtextclassifier3::StatusOr<PostingListUsed>
PostingListUsed::CreateFromUnitializedRegion(PostingListSerializer* serializer,
- void* posting_list_buffer,
uint32_t size_in_bytes) {
- ICING_ASSIGN_OR_RETURN(PostingListUsed posting_list_used,
- CreateFromPreexistingPostingListUsedRegion(
- serializer, posting_list_buffer, size_in_bytes));
+ ICING_ASSIGN_OR_RETURN(
+ PostingListUsed posting_list_used,
+ CreateFromPreexistingPostingListUsedRegion(
+ serializer, std::make_unique<uint8_t[]>(size_in_bytes),
+ size_in_bytes));
serializer->Clear(&posting_list_used);
return posting_list_used;
}
diff --git a/icing/file/posting_list/posting-list-used.h b/icing/file/posting_list/posting-list-used.h
index 6f68902..980d210 100644
--- a/icing/file/posting_list/posting-list-used.h
+++ b/icing/file/posting_list/posting-list-used.h
@@ -16,6 +16,7 @@
#define ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
#include <cstdint>
+#include <memory>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -106,57 +107,65 @@ class PostingListSerializer {
PostingListUsed* src) const = 0;
};
-// A posting list with data in it. Layout depends on the serializer.
+// A posting list with in-memory data. The caller should sync it to disk via
+// FlashIndexStorage. Layout depends on the serializer.
class PostingListUsed {
public:
- // Creates a PostingListUsed that points to a buffer of size_in_bytes bytes.
- // 'Preexisting' means that posting_list_buffer was previously modified by
- // another instance of PostingListUsed.
- //
- // Caller owns the data buffer and must not free it while using a
- // PostingListUsed.
+ // Creates a PostingListUsed that takes over the ownership of
+ // posting_list_buffer with size_in_bytes bytes. 'Preexisting' means that
+ // the data in posting_list_buffer was previously modified by another instance
+ // of PostingListUsed, and the caller should read the data from disk to
+ // posting_list_buffer.
//
// RETURNS:
// - A valid PostingListUsed if successful
// - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
- // fails
+ // fails
// - FAILED_PRECONDITION if serializer or posting_list_buffer is null
static libtextclassifier3::StatusOr<PostingListUsed>
- CreateFromPreexistingPostingListUsedRegion(PostingListSerializer* serializer,
- void* posting_list_buffer,
- uint32_t size_in_bytes);
+ CreateFromPreexistingPostingListUsedRegion(
+ PostingListSerializer* serializer,
+ std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes);
- // Creates a PostingListUsed that points to a buffer of size_in_bytes bytes
- // and initializes the content of the buffer so that the returned
- // PostingListUsed is empty.
- //
- // Caller owns the posting_list_buffer buffer and must not free it while using
- // a PostingListUsed.
+ // Creates a PostingListUsed that owns a buffer of size_in_bytes bytes and
+ // initializes the content of the buffer so that the returned PostingListUsed
+ // is empty.
//
// RETURNS:
// - A valid PostingListUsed if successful
// - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
- // fails
- // - FAILED_PRECONDITION if serializer or posting_list_buffer is null
+ // fails
+ // - FAILED_PRECONDITION if serializer is null
static libtextclassifier3::StatusOr<PostingListUsed>
CreateFromUnitializedRegion(PostingListSerializer* serializer,
- void* posting_list_buffer,
uint32_t size_in_bytes);
- uint8_t* posting_list_buffer() { return posting_list_buffer_; }
- const uint8_t* posting_list_buffer() const { return posting_list_buffer_; }
+ uint8_t* posting_list_buffer() {
+ is_dirty_ = true;
+ return posting_list_buffer_.get();
+ }
+
+ const uint8_t* posting_list_buffer() const {
+ return posting_list_buffer_.get();
+ }
uint32_t size_in_bytes() const { return size_in_bytes_; }
+ bool is_dirty() const { return is_dirty_; }
+
private:
- explicit PostingListUsed(void* posting_list_buffer, uint32_t size_in_bytes)
- : posting_list_buffer_(static_cast<uint8_t*>(posting_list_buffer)),
- size_in_bytes_(size_in_bytes) {}
+ explicit PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer,
+ uint32_t size_in_bytes)
+ : posting_list_buffer_(std::move(posting_list_buffer)),
+ size_in_bytes_(size_in_bytes),
+ is_dirty_(false) {}
// A byte array of size size_in_bytes_ containing encoded data for this
// posting list.
- uint8_t* posting_list_buffer_; // does not own!
+ std::unique_ptr<uint8_t[]> posting_list_buffer_;
uint32_t size_in_bytes_;
+
+ bool is_dirty_;
};
} // namespace lib
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index dd43204..5321d42 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -36,7 +36,7 @@
#include "icing/index/index-processor.h"
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index.h"
#include "icing/join/join-processor.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/endian.h"
@@ -74,7 +74,6 @@
#include "icing/scoring/scoring-processor.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
-#include "icing/store/suggestion-result-checker-impl.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
@@ -209,120 +208,6 @@ libtextclassifier3::Status ValidateSuggestionSpec(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<
- std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>>
-PopulateDocumentIdFilters(
- const DocumentStore* document_store,
- const icing::lib::SuggestionSpecProto& suggestion_spec,
- const std::unordered_set<NamespaceId>& namespace_ids) {
- std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
- document_id_filter_map;
- document_id_filter_map.reserve(suggestion_spec.document_uri_filters_size());
- for (const NamespaceDocumentUriGroup& namespace_document_uri_group :
- suggestion_spec.document_uri_filters()) {
- auto namespace_id_or = document_store->GetNamespaceId(
- namespace_document_uri_group.namespace_());
- if (!namespace_id_or.ok()) {
- // The current namespace doesn't exist.
- continue;
- }
- NamespaceId namespace_id = namespace_id_or.ValueOrDie();
- if (!namespace_ids.empty() &&
- namespace_ids.find(namespace_id) == namespace_ids.end()) {
- // The current namespace doesn't appear in the namespace filter.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The namespace : ", namespace_document_uri_group.namespace_(),
- " appears in the document uri filter, but doesn't appear in the "
- "namespace filter."));
- }
-
- if (namespace_document_uri_group.document_uris().empty()) {
- // Client should use namespace filter to filter out all document under
- // a namespace.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The namespace : ", namespace_document_uri_group.namespace_(),
- " has empty document uri in the document uri filter. Please use the "
- "namespace filter to exclude a namespace instead of the document uri "
- "filter."));
- }
-
- // Translate namespace document Uris into document_ids
- std::unordered_set<DocumentId> target_document_ids;
- target_document_ids.reserve(
- namespace_document_uri_group.document_uris_size());
- for (std::string_view document_uri :
- namespace_document_uri_group.document_uris()) {
- auto document_id_or = document_store->GetDocumentId(
- namespace_document_uri_group.namespace_(), document_uri);
- if (!document_id_or.ok()) {
- continue;
- }
- target_document_ids.insert(document_id_or.ValueOrDie());
- }
- document_id_filter_map.insert({namespace_id, target_document_ids});
- }
- return document_id_filter_map;
-}
-
-libtextclassifier3::StatusOr<std::unordered_map<SchemaTypeId, SectionIdMask>>
-PopulatePropertyFilters(
- const SchemaStore* schema_store,
- const icing::lib::SuggestionSpecProto& suggestion_spec,
- const std::unordered_set<SchemaTypeId>& schema_type_ids) {
- std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map;
- property_filter_map.reserve(suggestion_spec.type_property_filters_size());
- for (const TypePropertyMask& type_field_mask :
- suggestion_spec.type_property_filters()) {
- auto schema_type_id_or =
- schema_store->GetSchemaTypeId(type_field_mask.schema_type());
- if (!schema_type_id_or.ok()) {
- // The current schema doesn't exist
- continue;
- }
- SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
-
- if (!schema_type_ids.empty() &&
- schema_type_ids.find(schema_type_id) == schema_type_ids.end()) {
- // The current schema type doesn't appear in the schema type filter.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The schema : ", type_field_mask.schema_type(),
- " appears in the property filter, but doesn't appear in the schema"
- " type filter."));
- }
-
- if (type_field_mask.paths().empty()) {
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "The schema type : ", type_field_mask.schema_type(),
- " has empty path in the property filter. Please use the schema type"
- " filter to exclude a schema type instead of the property filter."));
- }
-
- // Translate property paths into section id mask
- SectionIdMask section_mask = kSectionIdMaskNone;
- auto section_metadata_list_or =
- schema_store->GetSectionMetadata(type_field_mask.schema_type());
- if (!section_metadata_list_or.ok()) {
- // The current schema doesn't has section metadata.
- continue;
- }
- std::unordered_set<std::string> target_property_paths;
- target_property_paths.reserve(type_field_mask.paths_size());
- for (const std::string& target_property_path : type_field_mask.paths()) {
- target_property_paths.insert(target_property_path);
- }
- const std::vector<SectionMetadata>* section_metadata_list =
- section_metadata_list_or.ValueOrDie();
- for (const SectionMetadata& section_metadata : *section_metadata_list) {
- if (target_property_paths.find(section_metadata.path) !=
- target_property_paths.end()) {
- section_mask |= UINT64_C(1) << section_metadata.id;
- }
- }
- property_filter_map.insert({schema_type_id, section_mask});
- }
- return property_filter_map;
-}
-
// Document store files are in a standalone subfolder for easier file
// management. We can delete and recreate the subfolder and not touch/affect
// anything else.
@@ -662,13 +547,6 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
std::string marker_filepath =
MakeSetSchemaMarkerFilePath(options_.base_dir());
- // TODO(b/249829533): switch to use persistent numeric index after
- // implementing and initialize numeric index.
- TC3_ASSIGN_OR_RETURN(
- integer_index_,
- DummyNumericIndex<int64_t>::Create(
- *filesystem_, MakeIntegerIndexWorkingPath(options_.base_dir())));
-
libtextclassifier3::Status index_init_status;
if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
// The schema was either lost or never set before. Wipe out the doc store
@@ -676,10 +554,14 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
const std::string doc_store_dir =
MakeDocumentDirectoryPath(options_.base_dir());
const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ const std::string integer_index_dir =
+ MakeIntegerIndexWorkingPath(options_.base_dir());
if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
- !filesystem_->DeleteDirectoryRecursively(index_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Could not delete directories: ", index_dir, " and ", doc_store_dir));
+ !filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+ !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok()) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Could not delete directories: ", index_dir, ", ",
+ integer_index_dir, " and", doc_store_dir));
}
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -694,7 +576,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
/*force_recovery_and_revalidate_documents=*/true, initialize_stats));
// We're going to need to build the index from scratch. So just delete its
- // files now.
+ // directory now.
+ // Discard index directory and instantiate a new one.
const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
Index::Options index_options(index_dir, options_.index_merge_size());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
@@ -706,6 +589,15 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
Index::Create(index_options, filesystem_.get(),
icing_filesystem_.get()));
+ // Discard integer index directory and instantiate a new one.
+ std::string integer_index_dir =
+ MakeIntegerIndexWorkingPath(options_.base_dir());
+ ICING_RETURN_IF_ERROR(
+ IntegerIndex::Discard(*filesystem_, integer_index_dir));
+ ICING_ASSIGN_OR_RETURN(
+ integer_index_,
+ IntegerIndex::Create(*filesystem_, std::move(integer_index_dir)));
+
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
index_init_status = std::move(restore_result.status);
@@ -724,6 +616,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
restore_timer->GetElapsedMilliseconds());
initialize_stats->set_index_restoration_cause(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ initialize_stats->set_integer_index_restoration_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
} else {
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -796,7 +690,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
}
Index::Options index_options(index_dir, options_.index_merge_size());
- InitializeStatsProto::RecoveryCause recovery_cause;
+ InitializeStatsProto::RecoveryCause index_recovery_cause;
auto index_or =
Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
if (!index_or.ok()) {
@@ -806,7 +700,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
absl_ports::StrCat("Could not recreate directory: ", index_dir));
}
- recovery_cause = InitializeStatsProto::IO_ERROR;
+ index_recovery_cause = InitializeStatsProto::IO_ERROR;
// Try recreating it from scratch and re-indexing everything.
ICING_ASSIGN_OR_RETURN(index_,
@@ -817,15 +711,46 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
index_ = std::move(index_or).ValueOrDie();
// If a recover does have to happen, then it must be because the index is
// out of sync with the document store.
- recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ index_recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+ }
+
+ std::string integer_index_dir =
+ MakeIntegerIndexWorkingPath(options_.base_dir());
+ InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
+ auto integer_index_or = IntegerIndex::Create(*filesystem_, integer_index_dir);
+ if (!integer_index_or.ok()) {
+ ICING_RETURN_IF_ERROR(
+ IntegerIndex::Discard(*filesystem_, integer_index_dir));
+
+ integer_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+ // Try recreating it from scratch and re-indexing everything.
+ ICING_ASSIGN_OR_RETURN(
+ integer_index_,
+ IntegerIndex::Create(*filesystem_, std::move(integer_index_dir)));
+ } else {
+ // Integer index was created fine.
+ integer_index_ = std::move(integer_index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ integer_index_recovery_cause =
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
}
std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
IndexRestorationResult restore_result = RestoreIndexIfNeeded();
- if (restore_result.needed_restoration) {
+ if (restore_result.index_needed_restoration ||
+ restore_result.integer_index_needed_restoration) {
initialize_stats->set_index_restoration_latency_ms(
restore_timer->GetElapsedMilliseconds());
- initialize_stats->set_index_restoration_cause(recovery_cause);
+
+ if (restore_result.index_needed_restoration) {
+ initialize_stats->set_index_restoration_cause(index_recovery_cause);
+ }
+ if (restore_result.integer_index_needed_restoration) {
+ initialize_stats->set_integer_index_restoration_cause(
+ integer_index_recovery_cause);
+ }
}
return restore_result.status;
}
@@ -942,14 +867,8 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
}
if (lost_previous_schema || index_incompatible) {
- // Clears all index files
- status = index_->Reset();
- if (!status.ok()) {
- TransformStatus(status, result_status);
- return result_proto;
- }
-
- status = integer_index_->Reset();
+ // Clears all indices
+ status = ClearIndices();
if (!status.ok()) {
TransformStatus(status, result_status);
return result_proto;
@@ -1080,7 +999,7 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
ICING_LOG(ERROR) << "Got an internal error from the index. Trying to "
"rebuild the index!\n"
<< index_status.error_message();
- index_status = index_->Reset();
+ index_status = ClearIndices();
if (index_status.ok()) {
index_status = RestoreIndexIfNeeded().status;
if (!index_status.ok()) {
@@ -1088,7 +1007,7 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
"indexing a document.";
}
} else {
- ICING_LOG(ERROR) << "Failed to reset the index after a failure of "
+ ICING_LOG(ERROR) << "Failed to clear the index after a failure of "
"indexing a document.";
}
}
@@ -1502,10 +1421,19 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
<< index_optimize_status.error_message();
should_rebuild_index = true;
}
+
+ libtextclassifier3::Status integer_index_optimize_status =
+ integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
+ document_store_->last_added_document_id());
+ if (!integer_index_optimize_status.ok()) {
+ ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
+ << integer_index_optimize_status.error_message();
+ should_rebuild_index = true;
+ }
}
// If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
// valid document store, but it might be the old one or the new one. So throw
- // out the index and rebuild from scratch.
+ // out the index data and rebuild from scratch.
// Likewise, if Index::Optimize failed, then attempt to recover the index by
// rebuilding from scratch.
// If ShouldRebuildIndex() returns true, we will also rebuild the index for
@@ -1513,26 +1441,13 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
if (should_rebuild_index) {
optimize_stats->set_index_restoration_mode(
OptimizeStatsProto::FULL_INDEX_REBUILD);
- ICING_LOG(WARNING) << "Resetting the entire index!";
-
- // Reset string index
- libtextclassifier3::Status index_reset_status = index_->Reset();
- if (!index_reset_status.ok()) {
- status = absl_ports::Annotate(
- absl_ports::InternalError("Failed to reset index."),
- index_reset_status.error_message());
- TransformStatus(status, result_status);
- optimize_stats->set_index_restoration_latency_ms(
- optimize_index_timer->GetElapsedMilliseconds());
- return result_proto;
- }
+ ICING_LOG(WARNING) << "Clearing the entire index!";
- // Reset integer index
- index_reset_status = integer_index_->Reset();
- if (!index_reset_status.ok()) {
+ libtextclassifier3::Status index_clear_status = ClearIndices();
+ if (!index_clear_status.ok()) {
status = absl_ports::Annotate(
- absl_ports::InternalError("Failed to reset integer index."),
- index_reset_status.error_message());
+ absl_ports::InternalError("Failed to clear index."),
+ index_clear_status.error_message());
TransformStatus(status, result_status);
optimize_stats->set_index_restoration_latency_ms(
optimize_index_timer->GetElapsedMilliseconds());
@@ -2180,52 +2095,49 @@ IcingSearchEngine::IndexRestorationResult
IcingSearchEngine::RestoreIndexIfNeeded() {
DocumentId last_stored_document_id =
document_store_->last_added_document_id();
- DocumentId last_indexed_document_id = index_->last_added_document_id();
-
- if (last_stored_document_id == last_indexed_document_id) {
+ if (last_stored_document_id == index_->last_added_document_id() &&
+ last_stored_document_id == integer_index_->last_added_document_id()) {
// No need to recover.
- return {libtextclassifier3::Status::OK, false};
+ return {libtextclassifier3::Status::OK, false, false};
}
if (last_stored_document_id == kInvalidDocumentId) {
- // Document store is empty but index is not. Reset the index.
- return {index_->Reset(), false};
+ // Document store is empty but index is not. Clear the index.
+ return {ClearIndices(), false, false};
}
- // TruncateTo ensures that the index does not hold any data that is not
- // present in the ground truth. If the document store lost some documents,
- // TruncateTo will ensure that the index does not contain any hits from those
- // lost documents. If the index does not contain any hits for documents with
- // document id greater than last_stored_document_id, then TruncateTo will have
- // no effect.
- auto status = index_->TruncateTo(last_stored_document_id);
- if (!status.ok()) {
- return {status, false};
+ // Truncate indices first.
+ auto truncate_result_or = TruncateIndicesTo(last_stored_document_id);
+ if (!truncate_result_or.ok()) {
+ return {std::move(truncate_result_or).status(), false, false};
}
- // Last indexed document id may have changed thanks to TruncateTo.
- last_indexed_document_id = index_->last_added_document_id();
- DocumentId first_document_to_reindex =
- (last_indexed_document_id != kInvalidDocumentId)
- ? index_->last_added_document_id() + 1
- : kMinDocumentId;
- if (first_document_to_reindex > last_stored_document_id) {
+ TruncateIndexResult truncate_result =
+ std::move(truncate_result_or).ValueOrDie();
+
+ if (truncate_result.first_document_to_reindex > last_stored_document_id) {
// Nothing to restore. Just return.
- return {libtextclassifier3::Status::OK, false};
+ return {libtextclassifier3::Status::OK, false, false};
}
+ // By using recovery_mode for IndexProcessor, we're able to replay documents
+ // from smaller document id and it will skip documents that are already been
+ // indexed.
auto index_processor_or = IndexProcessor::Create(
- normalizer_.get(), index_.get(), integer_index_.get(), clock_.get());
+ normalizer_.get(), index_.get(), integer_index_.get(), clock_.get(),
+ /*recovery_mode=*/true);
if (!index_processor_or.ok()) {
- return {index_processor_or.status(), true};
+ return {index_processor_or.status(),
+ truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration};
}
std::unique_ptr<IndexProcessor> index_processor =
std::move(index_processor_or).ValueOrDie();
ICING_VLOG(1) << "Restoring index by replaying documents from document id "
- << first_document_to_reindex << " to document id "
- << last_stored_document_id;
+ << truncate_result.first_document_to_reindex
+ << " to document id " << last_stored_document_id;
libtextclassifier3::Status overall_status;
- for (DocumentId document_id = first_document_to_reindex;
+ for (DocumentId document_id = truncate_result.first_document_to_reindex;
document_id <= last_stored_document_id; ++document_id) {
libtextclassifier3::StatusOr<DocumentProto> document_or =
document_store_->Get(document_id);
@@ -2237,7 +2149,8 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
continue;
} else {
// Returns other errors
- return {document_or.status(), true};
+ return {document_or.status(), truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration};
}
}
DocumentProto document(std::move(document_or).ValueOrDie());
@@ -2247,7 +2160,9 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
language_segmenter_.get(),
std::move(document));
if (!tokenized_document_or.ok()) {
- return {tokenized_document_or.status(), true};
+ return {tokenized_document_or.status(),
+ truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration};
}
TokenizedDocument tokenized_document(
std::move(tokenized_document_or).ValueOrDie());
@@ -2257,15 +2172,18 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
if (!status.ok()) {
if (!absl_ports::IsDataLoss(status)) {
// Real error. Stop recovering and pass it up.
- return {status, true};
+ return {status, truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration};
}
+ // FIXME: why can we skip data loss error here?
// Just a data loss. Keep trying to add the remaining docs, but report the
// data loss when we're done.
overall_status = status;
}
}
- return {overall_status, true};
+ return {overall_status, truncate_result.index_needed_restoration,
+ truncate_result.integer_index_needed_restoration};
}
libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
@@ -2291,6 +2209,67 @@ libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
return document_store_->last_added_document_id() != kInvalidDocumentId;
}
+libtextclassifier3::StatusOr<IcingSearchEngine::TruncateIndexResult>
+IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
+ // Attempt to truncate term index.
+ // TruncateTo ensures that the index does not hold any data that is not
+ // present in the ground truth. If the document store lost some documents,
+ // TruncateTo will ensure that the index does not contain any hits from those
+ // lost documents. If the index does not contain any hits for documents with
+ // document id greater than last_stored_document_id, then TruncateTo will have
+ // no effect.
+ ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+
+ // Get last indexed document id for term index after truncating.
+ DocumentId term_index_last_added_document_id =
+ index_->last_added_document_id();
+ DocumentId first_document_to_reindex =
+ (term_index_last_added_document_id != kInvalidDocumentId)
+ ? term_index_last_added_document_id + 1
+ : kMinDocumentId;
+ bool index_needed_restoration =
+ (last_stored_document_id != term_index_last_added_document_id);
+
+ // Attempt to truncate integer index.
+ bool integer_index_needed_restoration = false;
+ DocumentId integer_index_last_added_document_id =
+ integer_index_->last_added_document_id();
+ if (integer_index_last_added_document_id == kInvalidDocumentId ||
+ last_stored_document_id > integer_index_last_added_document_id) {
+ // If last_stored_document_id is greater than
+ // integer_index_last_added_document_id, then we only have to replay docs
+ // starting from integer_index_last_added_document_id + 1. Also use std::min
+ // since we might need to replay even smaller doc ids for term index.
+ integer_index_needed_restoration = true;
+ first_document_to_reindex =
+ integer_index_last_added_document_id != kInvalidDocumentId
+ ? std::min(first_document_to_reindex,
+ integer_index_last_added_document_id + 1)
+ : kMinDocumentId;
+ } else if (last_stored_document_id < integer_index_last_added_document_id) {
+ // Clear the entire integer index if last_stored_document_id is smaller than
+ // integer_index_last_added_document_id, because there is no way to remove
+ // data with doc_id > last_stored_document_id from integer index and we have
+ // to rebuild.
+ ICING_RETURN_IF_ERROR(integer_index_->Clear());
+
+ // Since the entire integer index is discarded, we start to rebuild it by
+ // setting first_document_to_reindex to kMinDocumentId.
+ integer_index_needed_restoration = true;
+ first_document_to_reindex = kMinDocumentId;
+ }
+
+ return TruncateIndexResult(first_document_to_reindex,
+ index_needed_restoration,
+ integer_index_needed_restoration);
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearIndices() {
+ ICING_RETURN_IF_ERROR(index_->Reset());
+ ICING_RETURN_IF_ERROR(integer_index_->Clear());
+ return libtextclassifier3::Status::OK;
+}
+
ResetResultProto IcingSearchEngine::Reset() {
absl_ports::unique_lock l(&mutex_);
return ResetInternal();
@@ -2350,7 +2329,8 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions(
// Create the suggestion processor.
auto suggestion_processor_or = SuggestionProcessor::Create(
- index_.get(), language_segmenter_.get(), normalizer_.get());
+ index_.get(), integer_index_.get(), language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(), schema_store_.get());
if (!suggestion_processor_or.ok()) {
TransformStatus(suggestion_processor_or.status(), response_status);
return response;
@@ -2358,74 +2338,9 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions(
std::unique_ptr<SuggestionProcessor> suggestion_processor =
std::move(suggestion_processor_or).ValueOrDie();
- // Populate target namespace filter.
- std::unordered_set<NamespaceId> namespace_ids;
- namespace_ids.reserve(suggestion_spec.namespace_filters_size());
- for (std::string_view name_space : suggestion_spec.namespace_filters()) {
- auto namespace_id_or = document_store_->GetNamespaceId(name_space);
- if (!namespace_id_or.ok()) {
- // The current namespace doesn't exist.
- continue;
- }
- namespace_ids.insert(namespace_id_or.ValueOrDie());
- }
- if (namespace_ids.empty() && !suggestion_spec.namespace_filters().empty()) {
- // None of desired namespace exists, we should return directly.
- response_status->set_code(StatusProto::OK);
- return response;
- }
-
- // Populate target document id filter.
- auto document_id_filter_map_or = PopulateDocumentIdFilters(
- document_store_.get(), suggestion_spec, namespace_ids);
- if (!document_id_filter_map_or.ok()) {
- TransformStatus(document_id_filter_map_or.status(), response_status);
- return response;
- }
- std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
- document_id_filter_map = document_id_filter_map_or.ValueOrDie();
- if (document_id_filter_map.empty() &&
- !suggestion_spec.document_uri_filters().empty()) {
- // None of desired DocumentId exists, we should return directly.
- response_status->set_code(StatusProto::OK);
- return response;
- }
-
- // Populate target schema type filter.
- std::unordered_set<SchemaTypeId> schema_type_ids;
- schema_type_ids.reserve(suggestion_spec.schema_type_filters_size());
- for (std::string_view schema_type : suggestion_spec.schema_type_filters()) {
- auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
- if (!schema_type_id_or.ok()) {
- continue;
- }
- schema_type_ids.insert(schema_type_id_or.ValueOrDie());
- }
- if (schema_type_ids.empty() &&
- !suggestion_spec.schema_type_filters().empty()) {
- // None of desired schema type exists, we should return directly.
- response_status->set_code(StatusProto::OK);
- return response;
- }
-
- // Populate target properties filter.
- auto property_filter_map_or = PopulatePropertyFilters(
- schema_store_.get(), suggestion_spec, schema_type_ids);
- if (!property_filter_map_or.ok()) {
- TransformStatus(property_filter_map_or.status(), response_status);
- return response;
- }
- std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map =
- property_filter_map_or.ValueOrDie();
-
// Run suggestion based on given SuggestionSpec.
- SuggestionResultCheckerImpl suggestion_result_checker_impl(
- document_store_.get(), std::move(namespace_ids),
- std::move(document_id_filter_map), std::move(schema_type_ids),
- std::move(property_filter_map));
libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
- suggestion_processor->QuerySuggestions(suggestion_spec,
- &suggestion_result_checker_impl);
+ suggestion_processor->QuerySuggestions(suggestion_spec);
if (!terms_or.ok()) {
TransformStatus(terms_or.status(), response_status);
return response;
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 446e081..da447d5 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -654,7 +654,8 @@ class IcingSearchEngine {
// INTERNAL_ERROR on any IO errors
struct IndexRestorationResult {
libtextclassifier3::Status status;
- bool needed_restoration;
+ bool index_needed_restoration;
+ bool integer_index_needed_restoration;
};
IndexRestorationResult RestoreIndexIfNeeded()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -670,6 +671,39 @@ class IcingSearchEngine {
// INTERNAL_ERROR on I/O error
libtextclassifier3::StatusOr<bool> LostPreviousSchema()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard parts of (term, integer) indices if they contain
+ // data for document ids greater than last_stored_document_id.
+ //
+ // Returns:
+ // On success, a DocumentId indicating the first document to start for
+ // reindexing and 2 bool flags indicating whether term or integer index
+ // needs restoration.
+ // INTERNAL on any I/O errors
+ struct TruncateIndexResult {
+ DocumentId first_document_to_reindex;
+ bool index_needed_restoration;
+ bool integer_index_needed_restoration;
+
+ explicit TruncateIndexResult(DocumentId first_document_to_reindex_in,
+ bool index_needed_restoration_in,
+ bool integer_index_needed_restoration_in)
+ : first_document_to_reindex(first_document_to_reindex_in),
+ index_needed_restoration(index_needed_restoration_in),
+ integer_index_needed_restoration(
+ integer_index_needed_restoration_in) {}
+ };
+ libtextclassifier3::StatusOr<TruncateIndexResult> TruncateIndicesTo(
+ DocumentId last_stored_document_id)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Helper method to discard the entire (term, integer) indices.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status ClearIndices()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
} // namespace lib
diff --git a/icing/icing-search-engine_backwards_compatibility_test.cc b/icing/icing-search-engine_backwards_compatibility_test.cc
index b9233cb..848c347 100644
--- a/icing/icing-search-engine_backwards_compatibility_test.cc
+++ b/icing/icing-search-engine_backwards_compatibility_test.cc
@@ -384,6 +384,184 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, MigrateToLargerScale) {
expected_document3_search));
}
+TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
+ MigrateToAppendOnlySchemaStorage) {
+ // Copy the testdata files into our IcingSearchEngine directory
+ std::string test_data_dir = GetTestDataDir("blob_schema_store");
+
+ // Create dst directory that we'll initialize the IcingSearchEngine over.
+ std::string base_dir = GetTestBaseDir() + "_migrate";
+ ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+ ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+ ASSERT_TRUE(filesystem()->CopyDirectory(test_data_dir.c_str(),
+ base_dir.c_str(),
+ /*recursive=*/true));
+
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(base_dir);
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ // No recovery is required for the document store.
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // TODO: create enum code for legacy schema store recovery after schema store
+ // change is made.
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // No recovery is required for the index.
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify that the schema stored in the index matches the one that we expect.
+ // Do not change unless you're also updating the testdata files.
+ SchemaProto expected_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedStringProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedIntegerProperty")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableIntegerProperty")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("stringExactProperty")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("stringPrefixProperty")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = expected_schema;
+ ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+ // These are the documents that are stored in the testdata files. Do not
+ // change unless you're also updating the testdata files.
+ DocumentProto expected_document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddStringProperty("body", "bar")
+ .Build();
+
+ DocumentProto expected_document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(20)
+ .SetScore(123)
+ .AddStringProperty("subject", "phoo")
+ .Build();
+
+ DocumentProto expected_document3 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(30)
+ .SetScore(123)
+ .AddStringProperty("stringExactProperty", "foo")
+ .AddInt64Property("indexableIntegerProperty", 10)
+ .Build();
+
+ EXPECT_THAT(
+ icing
+ .Get(expected_document1.namespace_(), expected_document1.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(expected_document1));
+ EXPECT_THAT(
+ icing
+ .Get(expected_document2.namespace_(), expected_document2.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(expected_document2));
+ EXPECT_THAT(
+ icing
+ .Get(expected_document3.namespace_(), expected_document3.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(expected_document3));
+
+ // Searching for "foo" should get us document1 and not document3 due to the
+ // schema type filter.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("foo");
+ search_spec.add_schema_type_filters("email");
+
+ SearchResultProto expected_document1_search;
+ expected_document1_search.mutable_status()->set_code(StatusProto::OK);
+ *expected_document1_search.mutable_results()->Add()->mutable_document() =
+ expected_document1;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_document1_search));
+
+ // Searching for "phoo" should get us document2.
+ search_spec.set_query("phoo");
+
+ SearchResultProto expected_document2_search;
+ expected_document2_search.mutable_status()->set_code(StatusProto::OK);
+ *expected_document2_search.mutable_results()->Add()->mutable_document() =
+ expected_document2;
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_document2_search));
+
+ // Searching for "foo" should get us both document 1 and document3 now that
+ // schema type 'transaction' has been added to the schema filter.
+ search_spec.set_query("foo");
+ search_spec.add_schema_type_filters("transaction");
+
+ SearchResultProto expected_document_1_and_3_search;
+ expected_document_1_and_3_search.mutable_status()->set_code(StatusProto::OK);
+ *expected_document_1_and_3_search.mutable_results()
+ ->Add()
+ ->mutable_document() = expected_document3;
+ *expected_document_1_and_3_search.mutable_results()
+ ->Add()
+ ->mutable_document() = expected_document1;
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_document_1_and_3_search));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index f9e3ca4..2c2fbeb 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -240,33 +240,7 @@ void BM_IndexLatency(benchmark::State& state) {
}
BENCHMARK(BM_IndexLatency)
// Arguments: num_indexed_documents, num_sections
- ->ArgPair(1, 1)
- ->ArgPair(2, 1)
- ->ArgPair(8, 1)
- ->ArgPair(32, 1)
- ->ArgPair(128, 1)
- ->ArgPair(1 << 10, 1)
- ->ArgPair(1 << 13, 1)
- ->ArgPair(1 << 15, 1)
- ->ArgPair(1 << 17, 1)
- ->ArgPair(1, 5)
- ->ArgPair(2, 5)
- ->ArgPair(8, 5)
- ->ArgPair(32, 5)
- ->ArgPair(128, 5)
- ->ArgPair(1 << 10, 5)
- ->ArgPair(1 << 13, 5)
- ->ArgPair(1 << 15, 5)
- ->ArgPair(1 << 17, 5)
- ->ArgPair(1, 10)
- ->ArgPair(2, 10)
- ->ArgPair(8, 10)
- ->ArgPair(32, 10)
- ->ArgPair(128, 10)
- ->ArgPair(1 << 10, 10)
- ->ArgPair(1 << 13, 10)
- ->ArgPair(1 << 15, 10)
- ->ArgPair(1 << 17, 10);
+ ->ArgPair(1000000, 5);
void BM_QueryLatency(benchmark::State& state) {
// Initialize the filesystem
@@ -303,7 +277,7 @@ void BM_QueryLatency(benchmark::State& state) {
SearchSpecProto search_spec = CreateSearchSpec(
language.at(0), std::vector<std::string>(), TermMatchType::PREFIX);
- ResultSpecProto result_spec = CreateResultSpec(1000000, 1000000, 1000000);
+ ResultSpecProto result_spec = CreateResultSpec(1, 1000000, 1000000);
ScoringSpecProto scoring_spec =
CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
for (auto _ : state) {
@@ -313,10 +287,7 @@ void BM_QueryLatency(benchmark::State& state) {
}
BENCHMARK(BM_QueryLatency)
// Arguments: num_indexed_documents, num_sections
- ->ArgPair(32, 2)
- ->ArgPair(128, 2)
- ->ArgPair(1 << 10, 2)
- ->ArgPair(1 << 13, 2);
+ ->ArgPair(1000000, 2);
void BM_IndexThroughput(benchmark::State& state) {
// Initialize the filesystem
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index 6ff21fb..f51abdf 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/icing-search-engine.h"
-
#include <cstdint>
#include <limits>
#include <memory>
@@ -26,6 +24,9 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/index/index.h"
+#include "icing/index/numeric/integer-index.h"
#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/endian.h"
@@ -46,8 +47,10 @@
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -63,8 +66,11 @@ namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
+using ::testing::DoDefault;
+using ::testing::EndsWith;
using ::testing::Eq;
using ::testing::HasSubstr;
+using ::testing::Matcher;
using ::testing::Return;
using ::testing::SizeIs;
@@ -152,6 +158,10 @@ std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+std::string GetIntegerIndexDir() {
+ return GetTestBaseDir() + "/integer_index_dir";
+}
+
std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
std::string GetHeaderFilename() {
@@ -169,6 +179,7 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
.SetKey(std::move(name_space), std::move(uri))
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
}
@@ -186,39 +197,92 @@ DocumentProto CreateEmailDocument(const std::string& name_space,
.Build();
}
-SchemaProto CreateMessageSchema() {
- return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
+SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
.Build();
}
-SchemaProto CreateEmailSchema() {
- return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
+SchemaTypeConfigProto CreateEmailSchemaTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
.Build();
}
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build();
+}
+
ScoringSpecProto GetDefaultScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
return scoring_spec;
}
+TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ SchemaProto email_schema = CreateMessageSchema();
+ EXPECT_THAT(icing.SetSchema(email_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ DocumentProto doc = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing.Put(doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing
+ .Get(doc.namespace_(), doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
+ .status()
+ .code(),
+ Eq(StatusProto::FAILED_PRECONDITION));
+
+ SearchSpecProto search_spec = SearchSpecProto::default_instance();
+ ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ constexpr int kSomePageToken = 12;
+ EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
+
+ EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Optimize().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+}
+
TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -465,16 +529,9 @@ TEST_F(IcingSearchEngineInitializationTest,
auto mock_filesystem = std::make_unique<MockFilesystem>();
std::string document_log_filepath =
icing_options.base_dir() + "/document_dir/document_log_v1";
- auto get_filesize_lambda = [this,
- &document_log_filepath](const char* filename) {
- if (strncmp(document_log_filepath.c_str(), filename,
- document_log_filepath.length()) == 0) {
- return Filesystem::kBadFileSize;
- }
- return this->filesystem()->GetFileSize(filename);
- };
- ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
- .WillByDefault(get_filesize_lambda);
+ ON_CALL(*mock_filesystem,
+ GetFileSize(Matcher<const char*>(Eq(document_log_filepath))))
+ .WillByDefault(Return(Filesystem::kBadFileSize));
TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
@@ -581,13 +638,26 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) {
icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
EqualsProto(expected_get_result_proto));
- // Checks that the index is still ok so we can search over it
+ // Checks that the term index is still ok so we can search over it
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+ // Checks that the integer index is still ok so we can search over it
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
ProtoIsOk());
@@ -663,6 +733,7 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetSchema("Message")
.AddStringProperty("additional", "content")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
@@ -672,23 +743,16 @@ TEST_F(IcingSearchEngineInitializationTest,
IcingSearchEngine icing(options, GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- property = type->add_properties();
- property->set_property_name("additional");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+ // Add non-indexable property "additional"
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("additional")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -718,30 +782,18 @@ TEST_F(IcingSearchEngineInitializationTest,
auto type = new_schema.add_types();
type->set_schema_type("Email");
- type = new_schema.add_types();
- type->set_schema_type("Message");
-
- // Adding a new property changes the SectionIds (since SectionIds are
- // assigned based on alphabetical order of indexed sections, marking
- // "additional" as an indexed property will push the "body" property to a
- // different SectionId)
- auto property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- property = type->add_properties();
- property->set_property_name("additional");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ // Switching a non-indexable property to indexable changes the SectionIds
+ // (since SectionIds are assigned based on alphabetical order of indexed
+ // sections, marking "additional" as an indexed property will push the
+ // "body" and "indexableInteger" property to different SectionIds)
+ *new_schema.add_types() =
+ SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("additional")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
// Write the marker file
std::string marker_filepath =
@@ -777,37 +829,75 @@ TEST_F(IcingSearchEngineInitializationTest,
GetResultSpecProto::default_instance()),
EqualsProto(expected_get_result_proto));
- SearchSpecProto search_spec;
+ // Verify term search
+ SearchSpecProto search_spec1;
// The section restrict will ensure we are using the correct, updated
// SectionId in the Index
- search_spec.set_query("additional:content");
+ search_spec1.set_query("additional:content");
// Schema type filter will ensure we're using the correct, updated
// SchemaTypeId in the DocumentStore
- search_spec.add_schema_type_filters("Message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec1.add_schema_type_filters("Message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ SearchResultProto expected_search_result_proto1;
+ expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
document2_with_additional_property;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto1));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec1.add_schema_type_filters("Message");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto expected_search_result_google::protobuf;
+ expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+ document2_with_additional_property;
+ *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_google::protobuf));
}
TEST_F(IcingSearchEngineInitializationTest,
RecoverFromInconsistentDocumentStore) {
+ // Test the following scenario: document store is ahead of term and integer
+ // index. IcingSearchEngine should be able to recover term index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - Still, we need to replay and reindex documents.
+
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
{
// Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -831,10 +921,38 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_EXPECT_OK(document_store->Put(document2));
}
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
// Index Restoration should be triggered here and document2 should be
// indexed.
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -850,27 +968,52 @@ TEST_F(IcingSearchEngineInitializationTest,
icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
EqualsProto(expected_get_result_proto));
- // We indexed the additional document
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
-
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ // We indexed the additional document in all indices.
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
-TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentIndex) {
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
+ // Test the following scenario: term index is corrupted (e.g. checksum doesn't
+ // match). IcingSearchEngine should be able to recover term index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Should discard the entire term index directory and start it from
+ // scratch.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect since we start it
+ // from scratch.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
SearchSpecProto search_spec;
search_spec.set_query("message");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
@@ -882,7 +1025,10 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentIndex) {
{
// Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -894,12 +1040,44 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentIndex) {
expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
- // Pretend we lost the entire index
- EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
- absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str()));
+ // Manually corrupt term index
+ {
+ const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
+ ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ }
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Mock filesystem to observe and check the behavior of term index and integer
+ // index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should be discarded once.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(1);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+ // should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
// Check that our index is ok by searching over the restored index
SearchResultProto search_result_proto =
@@ -909,10 +1087,24 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentIndex) {
expected_search_result_proto));
}
-TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
+ // Test the following scenario: integer index is corrupted (e.g. checksum
+ // doesn't match). IcingSearchEngine should be able to recover integer index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Should discard the entire integer index directory and start it from
+ // scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded, since we start it from scratch.
SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("indexableInteger == 123");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -921,7 +1113,10 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
{
// Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -933,14 +1128,46 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
- // Pretend index is corrupted
- const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
- ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
- ASSERT_TRUE(fd.is_valid());
- ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ // Manually corrupt integer index
+ {
+ const std::string integer_index_metadata_file =
+ GetIntegerIndexDir() + "/integer_index.m";
+ ScopedFd fd(
+ filesystem()->OpenForWrite(integer_index_metadata_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ }
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Mock filesystem to observe and check the behavior of term index and integer
+ // index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should be discarded once, and Clear()
+ // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+ // should never be discarded) since we start it from scratch.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
// Check that our index is ok by searching over the restored index
SearchResultProto search_result_proto =
@@ -950,115 +1177,249 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
expected_search_result_proto));
}
-TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
+ // Test the following scenario: losing the entire term index directory.
+ // IcingSearchEngine should be able to recover term index. Several additional
+ // behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should not be discarded since we've already lost
+ // it. Start it from scratch.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect since we start it
+ // from scratch.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ // 1. Create an index with 3 documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
- SchemaProto email_schema = CreateMessageSchema();
- EXPECT_THAT(icing.SetSchema(email_schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchema().status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- DocumentProto doc = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(doc).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing
- .Get(doc.namespace_(), doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
- .status()
- .code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
- SearchSpecProto search_spec = SearchSpecProto::default_instance();
- ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
- ResultSpecProto result_spec = ResultSpecProto::default_instance();
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- constexpr int kSomePageToken = 12;
- EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
+ // 2. Delete the term index directory to trigger RestoreIndexIfNeeded.
+ std::string idx_dir = GetIndexDir();
+ filesystem()->DeleteDirectoryRecursively(idx_dir.c_str());
- EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Optimize().status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded since we've already
+ // lost it.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ }
}
-TEST_F(IcingSearchEngineInitializationTest, RestoreIndex) {
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
+ // Test the following scenario: losing the entire integer index directory.
+ // IcingSearchEngine should be able to recover integer index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should not be discarded since we've already
+ // lost it. Start it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded, since we start it from scratch.
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
.Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
+ // 1. Create an index with 3 documents.
{
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- // Add two documents. These should get merged into the main index.
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
document = DocumentBuilder(document).SetUri("fake_type/1").Build();
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
document = DocumentBuilder(document).SetUri("fake_type/2").Build();
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
}
- // 2. Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+ // 2. Delete the integer index file to trigger RestoreIndexIfNeeded.
+ std::string integer_index_dir = GetIntegerIndexDir();
+ filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
// 3. Create the index again. This should trigger index restoration.
{
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded since we've
+ // already lost it, and Clear() should never be called (i.e. storage sub
+ // directory "*/integer_index_dir/*" should never be discarded) since we
+ // start it from scratch.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
// All documents should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(3));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0"));
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
}
}
-TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseLiteIndex) {
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateLiteIndexWithoutReindexing) {
+ // Test the following scenario: term lite index is *completely* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite index. This should be sufficient to make term index
+ // consistent with document store, so reindexing should not take place.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
.Build();
// 1. Create an index with a LiteIndex that will only allow one document
// before needing a merge.
{
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
@@ -1067,72 +1428,154 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseLiteIndex) {
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
document = DocumentBuilder(document).SetUri("fake_type/1").Build();
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
}
- // 2. Delete the last document from the document log
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id, but don't merge into the main index. This will
+ // cause mismatched last_added_document_id with term index.
+ // - Document store: [0, 1]
+ // - Term index
+ // - Main index: [0, 1]
+ // - Lite index: [2]
+ // - Integer index: [0, 1]
{
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- filesystem()->DeleteFile(document_log_file.c_str());
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
- auto create_result,
- PortableFileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
- std::move(create_result.proto_log);
-
- document = DocumentBuilder(document).SetUri("fake_type/0").Build();
- DocumentWrapper wrapper;
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
-
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/document.ByteSizeLong()),
+ &filesystem, &icing_filesystem));
+ DocumentId original_last_added_doc_id = index->last_added_document_id();
+ index->set_last_added_document_id(original_last_added_doc_id + 1);
+ Index::Editor editor =
+ index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
}
- // 3. Create the index again. This should throw out the lite index and trigger
- // index restoration which will only restore the two documents in the main
- // index.
+ // 3. Create the index again.
{
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Since truncating lite index is sufficient to make term index consistent
+ // with document store, replaying documents or reindex shouldn't take place.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
// Only the documents that were in the main index should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0"));
+ ASSERT_THAT(results1.results(), SizeIs(2));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/0"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(2));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/0"));
+ }
+
+ // 4. Since document 2 doesn't exist, testing query = "foo" is not enough to
+ // verify the correctness of term index restoration. Instead, we have to check
+ // hits for "foo" should not be found in term index.
+ {
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/document.ByteSizeLong()),
+ &filesystem, &icing_filesystem));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
}
-TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIndex) {
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateLiteIndexWithReindexing) {
+ // Test the following scenario: term lite index is *partially* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite index. However, some valid data in term lite index were
+ // discarded together, so reindexing should still take place to recover
+ // them after truncating.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
.Build();
// 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
+ // before needing a merge.
{
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
@@ -1146,46 +1589,668 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIndex) {
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
}
- // 2. Delete the last two documents from the document log.
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id, but don't merge into the main index. This will
+ // cause mismatched last_added_document_id with term index.
+ // - Document store: [0, 1, 2]
+ // - Term index
+ // - Main index: [0, 1]
+ // - Lite index: [2, 3]
+ // - Integer index: [0, 1, 2]
{
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- filesystem()->DeleteFile(document_log_file.c_str());
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
- auto create_result,
- PortableFileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
- std::move(create_result.proto_log);
-
- document = DocumentBuilder(document).SetUri("fake_type/0").Build();
- DocumentWrapper wrapper;
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/document.ByteSizeLong()),
+ &filesystem, &icing_filesystem));
+ DocumentId original_last_added_doc_id = index->last_added_document_id();
+ index->set_last_added_document_id(original_last_added_doc_id + 1);
+ Index::Editor editor =
+ index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
}
- // 3. Create the index again. This should throw out the lite and main index
- // and trigger index restoration.
+ // 3. Create the index again.
{
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Truncating lite index not only deletes data ahead document store, but
+ // also deletes valid data. Therefore, we still have to replay documents and
+ // reindex.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // Only the documents that were in the main index should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ }
+
+ // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
+ // verify the correctness of term index restoration. Instead, we have to check
+ // hits for "foo" should not be found in term index.
+ {
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/document.ByteSizeLong()),
+ &filesystem, &icing_filesystem));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateMainIndexWithoutReindexing) {
+ // Test the following scenario: term main index is *completely* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite and main index. This should be sufficient to make term
+ // index consistent with document store (in this case, document store is
+ // empty as well), so reindexing should not take place.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+
+ // 1. Create an index with no document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id. Merge some of them into the main index and keep
+ // others in the lite index. This will cause mismatched document id with
+ // document store.
+ // - Document store: []
+ // - Term index
+ // - Main index: [0]
+ // - Lite index: [1]
+ // - Integer index: []
+ {
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ // index merge size is not important here because we will manually
+ // invoke merge below.
+ Index::Options(GetIndexDir(), /*index_merge_size=*/100),
+ &filesystem, &icing_filesystem));
+ // Add hits for document 0 and merge.
+ ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
+ index->set_last_added_document_id(0);
+ Index::Editor editor =
+ index->Edit(/*document_id=*/0, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ ICING_ASSERT_OK(index->Merge());
+
+ // Add hits for document 1 and don't merge.
+ index->set_last_added_document_id(1);
+ editor = index->Edit(/*document_id=*/1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ }
+
+ // 3. Create the index again. This should throw out the lite and main index.
+ {
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Since truncating main index is sufficient to make term index consistent
+ // with document store, replaying documents or reindexing shouldn't take
+ // place.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+
+ // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not
+ // enough to verify the correctness of term index restoration. Instead, we
+ // have to check hits for "foo", "bar" should not be found in term index.
+ {
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100),
+ &filesystem, &icing_filesystem));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_iter,
+ index->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateMainIndexWithReindexing) {
+ // Test the following scenario: term main index is *partially* ahead of
+ // document store. IcingSearchEngine should be able to recover term index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - In RestoreIndexIfNecessary():
+ // - "TruncateTo()" for term index should take effect and throw out the
+ // entire lite and main index. However, some valid data in term main index
+ // were discarded together, so reindexing should still take place to
+ // recover them after truncating.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ // 1. Create an index with 3 documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into term lite index and increment
+ // last_added_document_id. Merge some of them into the main index and keep
+ // others in the lite index. This will cause mismatched document id with
+ // document store.
+ // - Document store: [0, 1, 2]
+ // - Term index
+ // - Main index: [0, 1, 2, 3]
+ // - Lite index: [4]
+ // - Integer index: [0, 1, 2]
+ {
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(
+ Index::Options(GetIndexDir(),
+ /*index_merge_size=*/document.ByteSizeLong()),
+ &filesystem, &icing_filesystem));
+ // Add hits for document 3 and merge.
+ DocumentId original_last_added_doc_id = index->last_added_document_id();
+ index->set_last_added_document_id(original_last_added_doc_id + 1);
+ Index::Editor editor =
+ index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("foo"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ ICING_ASSERT_OK(index->Merge());
+
+ // Add hits for document 4 and don't merge.
+ index->set_last_added_document_id(original_last_added_doc_id + 2);
+ editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0,
+ TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ICING_ASSERT_OK(editor.BufferTerm("bar"));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+ }
+
+ // 3. Create the index again. This should throw out the lite and main index
+ // and trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded. since we only call
+ // TruncateTo for term index.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Truncating main index not only deletes data ahead document store, but
+ // also deletes valid data. Therefore, we still have to replay documents and
+ // reindex.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
// Only the first document should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0"));
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ }
+
+ // 4. Since document 3, 4 don't exist, testing queries = "foo", "bar" are not
+ // enough to verify the correctness of term index restoration. Instead, we
+ // have to check hits for "foo", "bar" should not be found in term index.
+ {
+ Filesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100),
+ &filesystem, &icing_filesystem));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_iter,
+ index->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateIntegerIndexWithoutReindexing) {
+ // Test the following scenario: integer index is *completely* ahead of
+ // document store. IcingSearchEngine should be able to recover integer index.
+ // Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" should be called for integer index and throw out all integer
+ // index storages, i.e. all storage sub directories (path_expr =
+ // "*/integer_index_dir/*") should be discarded. This should be sufficient
+ // to make integer index consistent with document store (in this case,
+ // document store is empty as well), so reindexing should not take place.
+
+ // 1. Create an index with no document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into integer index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: []
+ // - Term index: []
+ // - Integer index: [0]
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
+ // Add hits for document 0.
+ ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId);
+ integer_index->set_last_added_document_id(0);
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
+ /*property_path=*/"indexableInteger", /*document_id=*/0,
+ /*section_id=*/0);
+ ICING_ASSERT_OK(editor->BufferKey(123));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate integer index and thus storage sub
+ // directory (path_expr = "*/integer_index_dir/*") should be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(1);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // Since truncating integer index is sufficient to make it consistent with
+ // document store, replaying documents or reindexing shouldn't take place.
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+
+ // 4. Since document 0 doesn't exist, testing numeric query
+ // "indexableInteger == 123" is not enough to verify the correctness of
+ // integer index restoration. Instead, we have to check hits for 123 should
+ // not be found in integer index.
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ integer_index->GetIterator(/*property_path=*/"indexableInteger",
+ /*key_lower=*/123, /*key_upper=*/123));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RestoreIndexTruncateIntegerIndexWithReindexing) {
+ // Test the following scenario: integer index is *partially* ahead of document
+ // store. IcingSearchEngine should be able to recover integer index. Several
+ // additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" should be called for integer index and throw out all integer
+ // index storages, i.e. all storage sub directories (path_expr =
+ // "*/integer_index_dir/*") should be discarded. However, some valid data
+ // in integer index were discarded together, so reindexing should still
+ // take place to recover them after clearing.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ // 1. Create an index with 3 documents.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Manually add some data into integer index and increment
+ // last_added_document_id. This will cause mismatched document id with
+ // document store.
+ // - Document store: [0, 1, 2]
+ // - Term index: [0, 1, 2]
+ // - Integer index: [0, 1, 2, 3]
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
+ // Add hits for document 3.
+ DocumentId original_last_added_doc_id =
+ integer_index->last_added_document_id();
+ integer_index->set_last_added_document_id(original_last_added_doc_id + 1);
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
+ /*property_path=*/"indexableInteger",
+ /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0);
+ ICING_ASSERT_OK(editor->BufferKey(456));
+ ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+ }
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of term index and
+ // integer index.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ // Clear() should be called to truncate integer index and thus storage sub
+ // directory (path_expr = "*/integer_index_dir/*") should be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(1);
+
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify term index works normally
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("consectetur");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results1 =
+ icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results1.status(), ProtoIsOk());
+ EXPECT_THAT(results1.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results1.results(), SizeIs(3));
+ EXPECT_THAT(results1.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results1.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results1.results(2).document().uri(), Eq("fake_type/0"));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results2 =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results2.results(), SizeIs(3));
+ EXPECT_THAT(results2.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results2.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results2.results(2).document().uri(), Eq("fake_type/0"));
+ }
+
+ // 4. Since document 3 doesn't exist, testing numeric query
+ // "indexableInteger == 456" is not enough to verify the correctness of
+ // integer index restoration. Instead, we have to check hits for 456 should
+ // not be found in integer index.
+ {
+ Filesystem filesystem;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem, GetIntegerIndexDir()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+ integer_index->GetIterator(/*property_path=*/"indexableInteger",
+ /*key_lower=*/456, /*key_upper=*/456));
+ EXPECT_THAT(doc_hit_info_iter->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
}
@@ -1200,21 +2265,29 @@ TEST_F(IcingSearchEngineInitializationTest,
// Set a schema for a single type that has no indexed properties.
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("unindexedField")
- .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
- .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedField")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("unindexedInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
.Build();
ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- // Add a document that contains no indexed content.
+ // Add a document that contains no indexed properties.
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("unindexedField",
"Don't you dare search over this!")
+ .AddInt64Property("unindexedInteger", -123)
.Build();
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
}
@@ -1232,6 +2305,9 @@ TEST_F(IcingSearchEngineInitializationTest,
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ init_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
}
}
@@ -1243,10 +2319,27 @@ TEST_F(IcingSearchEngineInitializationTest,
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- // Set a schema for a single type that has no indexed properties.
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // Set a schema for a single type that has no indexed contents.
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- // Add a document that contains no valid indexed content - just punctuation.
+ // Add a document that contains:
+ // - No valid indexed string content - just punctuation
+ // - No integer content - since it is an optional property
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
@@ -1268,6 +2361,9 @@ TEST_F(IcingSearchEngineInitializationTest,
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ init_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
}
}
@@ -1290,11 +2386,13 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetKey("icing", "fake_type/1")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
DocumentProto document2 = DocumentBuilder()
.SetKey("icing", "fake_type/2")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 456)
.Build();
{
@@ -1353,6 +2451,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(0));
@@ -1370,6 +2471,7 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
{
@@ -1413,10 +2515,16 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result_proto.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::PARTIAL_LOSS));
+ // Since document store rewinds to previous checkpoint, last stored doc id
+ // will be consistent with last added document ids in term/integer indices,
+ // so there will be no index restoration.
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -1434,6 +2542,7 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetKey("icing", "fake_type/1")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
const std::string document_log_file = absl_ports::StrCat(
@@ -1504,6 +2613,9 @@ TEST_F(IcingSearchEngineInitializationTest,
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -1516,11 +2628,12 @@ TEST_F(IcingSearchEngineInitializationTest,
}
TEST_F(IcingSearchEngineInitializationTest,
- InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) {
+ InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
{
// Initialize and put a document.
@@ -1551,6 +2664,70 @@ TEST_F(IcingSearchEngineInitializationTest,
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(
+ IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the integer index file to trigger RestoreIndexIfNeeded.
+ std::string integer_index_dir = GetIntegerIndexDir();
+ filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
+ }
+
+ {
+ // Index is empty but ground truth is not. Index should be restored due to
+ // the inconsistency.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -1572,11 +2749,12 @@ TEST_F(IcingSearchEngineInitializationTest,
}
TEST_F(IcingSearchEngineInitializationTest,
- InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
+ InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
IcingSearchEngineOptions options = GetDefaultIcingOptions();
{
@@ -1592,13 +2770,7 @@ TEST_F(IcingSearchEngineInitializationTest,
SchemaProto new_schema =
SchemaBuilder()
.AddType(
- SchemaTypeConfigBuilder()
- .SetType("Message")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
+ SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
.AddProperty(PropertyConfigBuilder()
.SetName("subject")
.SetDataTypeString(TERM_MATCH_PREFIX,
@@ -1633,6 +2805,9 @@ TEST_F(IcingSearchEngineInitializationTest,
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -1666,6 +2841,9 @@ TEST_F(IcingSearchEngineInitializationTest,
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
@@ -1692,6 +2870,7 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
{
// Initialize and put one document.
@@ -1701,23 +2880,16 @@ TEST_F(IcingSearchEngineInitializationTest,
ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
- // lambda to fail OpenForWrite on lite index hit buffer once.
- bool has_failed_already = false;
- auto open_write_lambda = [this, &has_failed_already](const char* filename) {
- std::string lite_index_buffer_file_path =
- absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
- std::string filename_string(filename);
- if (!has_failed_already && filename_string == lite_index_buffer_file_path) {
- has_failed_already = true;
- return -1;
- }
- return this->filesystem()->OpenForWrite(filename);
- };
-
+ std::string lite_index_buffer_file_path =
+ absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+ EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_))
+ .WillRepeatedly(DoDefault());
// This fails Index::Create() once.
- ON_CALL(*mock_icing_filesystem, OpenForWrite)
- .WillByDefault(open_write_lambda);
+ EXPECT_CALL(*mock_icing_filesystem,
+ OpenForWrite(Eq(lite_index_buffer_file_path)))
+ .WillOnce(Return(-1))
+ .WillRepeatedly(DoDefault());
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(10);
@@ -1731,6 +2903,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(10));
@@ -1752,11 +2927,12 @@ TEST_F(IcingSearchEngineInitializationTest,
}
TEST_F(IcingSearchEngineInitializationTest,
- InitializeShouldLogRecoveryCauseDocStoreIOError) {
+ InitializeShouldLogRecoveryCauseIntegerIndexIOError) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.Build();
{
// Initialize and put one document.
@@ -1766,25 +2942,77 @@ TEST_F(IcingSearchEngineInitializationTest,
ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
- // lambda to fail Read on document store header once.
- bool has_failed_already = false;
- auto read_lambda = [this, &has_failed_already](const char* filename,
- void* buf, size_t buf_size) {
- std::string document_store_header_file_path =
- absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
- std::string filename_string(filename);
- if (!has_failed_already &&
- filename_string == document_store_header_file_path) {
- has_failed_already = true;
- return false;
- }
- return this->filesystem()->Read(filename, buf, buf_size);
- };
+ std::string integer_index_metadata_file =
+ absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m");
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault());
+ // This fails IntegerIndex::Create() once.
+ EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file)))
+ .WillOnce(Return(-1))
+ .WillRepeatedly(DoDefault());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseDocStoreIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
+ .Build();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ std::string document_store_header_file_path =
+ absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
+ .WillRepeatedly(DoDefault());
// This fails DocumentStore::InitializeDerivedFiles() once.
- ON_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
- .WillByDefault(read_lambda);
+ EXPECT_CALL(
+ *mock_filesystem,
+ Read(Matcher<const char*>(Eq(document_store_header_file_path)), _, _))
+ .WillOnce(Return(false))
+ .WillRepeatedly(DoDefault());
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(10);
@@ -1807,6 +3035,9 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
Eq(0));
@@ -1861,6 +3092,9 @@ TEST_F(IcingSearchEngineInitializationTest,
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
}
@@ -1891,17 +3125,7 @@ TEST_F(IcingSearchEngineInitializationTest,
// Create and set a schema with two type configs: Email and Message.
SchemaProto schema = CreateEmailSchema();
-
- auto type = schema.add_types();
- type->set_schema_type("Message");
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ *schema.add_types() = CreateMessageSchemaTypeConfig();
ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
}
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
index da02c4a..b2c7a62 100644
--- a/icing/icing-search-engine_optimize_test.cc
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/icing-search-engine.h"
+#include <unistd.h>
#include <cstdint>
#include <limits>
@@ -26,6 +26,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
@@ -45,6 +46,7 @@
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
@@ -126,17 +128,24 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
.SetKey(std::move(name_space), std::move(uri))
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
}
SchemaProto CreateMessageSchema() {
return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
.Build();
}
@@ -267,6 +276,7 @@ TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
.SetKey("namespace", "uri2")
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 456)
.SetCreationTimestampMs(100)
.SetTtlMs(500)
.Build();
@@ -408,6 +418,15 @@ TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) {
TEST_F(IcingSearchEngineOptimizeTest,
GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
DocumentProto empty_document1 =
DocumentBuilder()
.SetKey("namespace", "uri1")
@@ -434,7 +453,7 @@ TEST_F(IcingSearchEngineOptimizeTest,
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(std::move(schema)).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk());
@@ -626,9 +645,17 @@ TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) {
TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
DocumentProto document = CreateMessageDocument("namespace", "uri");
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
+
+ SearchSpecProto search_spec1;
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+ search_spec1.set_query("m");
+
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
@@ -642,20 +669,37 @@ TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that Search() works right after Optimize()
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ // Term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+
+ // Verify term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineOptimizeTest,
@@ -698,10 +742,6 @@ TEST_F(IcingSearchEngineOptimizeTest,
EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
@@ -709,11 +749,29 @@ TEST_F(IcingSearchEngineOptimizeTest,
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("m");
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineOptimizeTest,
@@ -760,36 +818,57 @@ TEST_F(IcingSearchEngineOptimizeTest,
.SetKey("namespace", "uri2")
.SetSchema("Message")
.AddStringProperty("body", "new body")
+ .AddInt64Property("indexableInteger", 456)
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("m");
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
// Searching old content returns nothing because original file directory is
// missing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ // Term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
- search_spec.set_query("n");
+ // Numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ // Searching new content returns the new document
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
new_document;
-
- // Searching new content returns the new document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ // Term search
+ search_spec1.set_query("n");
+ search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ search_spec2.set_query("indexableInteger == 456");
+ search_result_google::protobuf = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineOptimizeTest,
@@ -838,35 +917,56 @@ TEST_F(IcingSearchEngineOptimizeTest,
.SetKey("namespace", "uri2")
.SetSchema("Message")
.AddStringProperty("body", "new body")
+ .AddInt64Property("indexableInteger", 456)
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("m");
+ search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
// Searching old content returns nothing because original files are missing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ // Term search
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
- search_spec.set_query("n");
+ // Numeric (integer) search
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ // Searching new content returns the new document
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
new_document;
-
- // Searching new content returns the new document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
+ // Term search
+ search_spec1.set_query("n");
+ search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Numeric (integer) search
+ search_spec2.set_query("indexableInteger == 456");
+ search_result_google::protobuf = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
index b369d40..59d25e5 100644
--- a/icing/icing-search-engine_schema_test.cc
+++ b/icing/icing-search-engine_schema_test.cc
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/icing-search-engine.h"
-
#include <cstdint>
#include <limits>
#include <memory>
@@ -26,6 +24,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
@@ -45,6 +44,7 @@
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -124,20 +124,29 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
.SetKey(std::move(name_space), std::move(uri))
.SetSchema("Message")
.AddStringProperty("body", "message body")
+ .AddInt64Property("indexableInteger", 123)
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
}
-SchemaProto CreateMessageSchema() {
- return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
+SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
.Build();
}
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
+}
+
ScoringSpecProto GetDefaultScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -612,12 +621,7 @@ TEST_F(IcingSearchEngineSchemaTest, SetSchema) {
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
SchemaProto schema_with_email_and_message = schema_with_email;
- type = schema_with_email_and_message.add_types();
- type->set_schema_type("Message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ *schema_with_email_and_message.add_types() = CreateMessageSchemaTypeConfig();
// Create an arbitrary invalid schema
SchemaProto invalid_schema;
@@ -663,58 +667,217 @@ TEST_F(IcingSearchEngineSchemaTest, SetSchema) {
}
TEST_F(IcingSearchEngineSchemaTest,
- SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
+ SetSchemaNewIndexedStringPropertyTriggersIndexRestorationAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
- schema_with_no_indexed_property.mutable_types(0)
- ->mutable_properties(0)
- ->clear_string_indexing_config();
+ // Create a schema with 2 properties:
+ // - 'a': string type, unindexed. No section id assigned.
+ // - 'b': int64 type, indexed. Section id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Schema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(schema_with_no_indexed_property);
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
// Ignore latency numbers. They're covered elsewhere.
set_schema_result.clear_latency_ms();
SetSchemaResultProto expected_set_schema_result;
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Schema");
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
- // Nothing will be index and Search() won't return anything.
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Schema")
+ .AddStringProperty("a", "message body")
+ .AddInt64Property("b", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ // Only 'b' will be indexed.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
+ // Verify term search: won't get anything.
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("a:message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(empty_result));
- SchemaProto schema_with_indexed_property = CreateMessageSchema();
+ // Verify numeric (integer) search: will get document.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("b == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Change the schema to:
+ // - 'a': string type, indexed. Section id = 0.
+ // - 'b': int64 type, indexed. Section id = 1.
+ SchemaProto schema_two = schema_one;
+ schema_two.mutable_types(0)
+ ->mutable_properties(0)
+ ->mutable_string_indexing_config()
+ ->set_term_match_type(TERM_MATCH_PREFIX);
+ schema_two.mutable_types(0)
+ ->mutable_properties(0)
+ ->mutable_string_indexing_config()
+ ->set_tokenizer_type(TOKENIZER_PLAIN);
// Index restoration should be triggered here because new schema requires more
- // properties to be indexed.
- set_schema_result = icing.SetSchema(schema_with_indexed_property);
+ // properties to be indexed. Also new section ids will be reassigned and index
+ // restoration should use new section ids to rebuild.
+ set_schema_result = icing.SetSchema(schema_two);
// Ignore latency numbers. They're covered elsewhere.
set_schema_result.clear_latency_ms();
expected_set_schema_result = SetSchemaResultProto();
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Message");
+ ->Add("Schema");
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+ // Verify term search: will get document now.
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search: will still get document.
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaNewIndexedIntegerPropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with 2 properties:
+ // - 'a': int64 type, unindexed. No section id assigned.
+ // - 'b': string type, indexed. Section id = 0.
+ SchemaProto schema_one =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Schema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Schema");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Schema")
+ .AddInt64Property("a", 123)
+ .AddStringProperty("b", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ // Only 'b' will be indexed.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search: will get document.
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("b:message");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search: won't get anything.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("a == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Change the schema to:
+ // - 'a': int64 type, indexed. Section id = 0.
+ // - 'b': string type, indexed. Section id = 1.
+ SchemaProto schema_two = schema_one;
+ schema_two.mutable_types(0)
+ ->mutable_properties(0)
+ ->mutable_integer_indexing_config()
+ ->set_numeric_match_type(NUMERIC_MATCH_RANGE);
+ // Index restoration should be triggered here because new schema requires more
+ // properties to be indexed. Also new section ids will be reassigned and index
+ // restoration should use new section ids to rebuild.
+ set_schema_result = icing.SetSchema(schema_two);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Schema");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Verify term search: will still get document.
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search: will get document now.
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -733,7 +896,16 @@ TEST_F(IcingSearchEngineSchemaTest,
.SetName("name")
.SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("age")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
.Build();
+ // Create a schema with nested properties:
+ // - "sender.age": int64 type, (nested) indexed. Section id = 0.
+ // - "sender.name": string type, (nested) indexed. Section id = 1.
+ // - "subject": string type, indexed. Section id = 2.
+ // - "timestamp": int64 type, indexed. Section id = 3.
SchemaProto nested_schema =
SchemaBuilder()
.AddType(person_proto)
@@ -749,6 +921,10 @@ TEST_F(IcingSearchEngineSchemaTest,
.SetName("subject")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
@@ -773,39 +949,67 @@ TEST_F(IcingSearchEngineSchemaTest,
.SetKey("namespace1", "uri1")
.SetSchema("Person")
.AddStringProperty("name", "Bill Lundbergh")
+ .AddInt64Property("age", 20)
.Build())
+ .AddInt64Property("timestamp", 1234)
.Build();
- // "sender.name" should get assigned property id 0 and subject should get
- // property id 1.
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ // Verify term search
// document should match a query for 'Bill' in 'sender.name', but not in
// 'subject'
- SearchSpecProto search_spec;
- search_spec.set_query("sender.name:Bill");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("sender.name:Bill");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- search_spec.set_query("subject:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ search_spec1.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Verify numeric (integer) search
+ // document should match a query for 20 in 'sender.age', but not in
+ // 'timestamp'
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("sender.age == 20");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec2.set_query("timestamp == 20");
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(empty_result));
// Now update the schema with index_nested_properties=false. This should
- // reassign property ids, lead to an index rebuild and ensure that nothing
- // match a query for "Bill".
+ // reassign section ids, lead to an index rebuild and ensure that nothing
+ // match a query for "Bill" or 20.
+ // - "sender.age": int64 type, (nested) unindexed. No section id assigned.
+ // - "sender.name": string type, (nested) unindexed. No section id assigned.
+ // - "subject": string type, indexed. Section id = 0.
+ // - "timestamp": int64 type, indexed. Section id = 1.
SchemaProto no_nested_schema =
SchemaBuilder()
.AddType(person_proto)
@@ -821,6 +1025,10 @@ TEST_F(IcingSearchEngineSchemaTest,
.SetName("subject")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
@@ -833,16 +1041,36 @@ TEST_F(IcingSearchEngineSchemaTest,
->Add("Email");
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+ // Verify term search
// document shouldn't match a query for 'Bill' in either 'sender.name' or
// 'subject'
- search_spec.set_query("sender.name:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ search_spec1.set_query("sender.name:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec1.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(empty_result));
- search_spec.set_query("subject:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ // Verify numeric (integer) search
+ // document shouldn't match a query for 20 in either 'sender.age' or
+ // 'timestamp'
+ search_spec2.set_query("sender.age == 20");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec2.set_query("timestamp == 20");
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(empty_result));
@@ -853,8 +1081,11 @@ TEST_F(IcingSearchEngineSchemaTest,
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- // 'body' should have a property id of 0 and 'subject' should have a property
- // id of 1.
+ // Create a schema with 4 properties:
+ // - "body": string type, indexed. Section id = 0.
+ // - "subject": string type, indexed. Section id = 1.
+ // - "timestamp1": int64 type, indexed. Section id = 2.
+ // - "timestamp2": int64 type, indexed. Section id = 3.
SchemaProto email_with_body_schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -868,6 +1099,14 @@ TEST_F(IcingSearchEngineSchemaTest,
.SetName("body")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp1")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp2")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
@@ -880,7 +1119,7 @@ TEST_F(IcingSearchEngineSchemaTest,
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
- // Create a document with only a subject property.
+ // Create a document with only subject and timestamp2 property.
DocumentProto document =
DocumentBuilder()
.SetKey("namespace1", "uri1")
@@ -888,36 +1127,64 @@ TEST_F(IcingSearchEngineSchemaTest,
.SetCreationTimestampMs(1000)
.AddStringProperty("subject",
"Did you get the memo about TPS reports?")
+ .AddInt64Property("timestamp2", 1234)
.Build();
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ // Verify term search
// We should be able to retrieve the document by searching for 'tps' in
// 'subject'.
- SearchSpecProto search_spec;
- search_spec.set_query("subject:tps");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("subject:tps");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
- // Now update the schema to remove the 'body' field. This is backwards
- // incompatible, but document should be preserved because it doesn't contain a
- // 'body' field. If the index is correctly rebuilt, then 'subject' will now
- // have a property id of 0. If not, then the hits in the index will still have
- // have a property id of 1 and therefore it won't be found.
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp2'.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("timestamp2 == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Now update the schema to remove the 'body' and 'timestamp1' field. This is
+ // backwards incompatible, but document should be preserved because it doesn't
+ // contain a 'body' or 'timestamp1' field.
+ // - "subject": string type, indexed. Section id = 0.
+ // - "timestamp2": int64 type, indexed. Section id = 1.
+ //
+ // If the index is not correctly rebuilt, then the hits of 'subject' and
+ // 'timestamp2' in the index will still have old section ids of 1, 3 and
+ // therefore they won't be found.
SchemaProto email_no_body_schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp2")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
set_schema_result = icing.SetSchema(
@@ -931,12 +1198,27 @@ TEST_F(IcingSearchEngineSchemaTest,
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+ // Verify term search
// We should be able to retrieve the document by searching for 'tps' in
// 'subject'.
- search_spec.set_query("subject:tps");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ search_spec1.set_query("subject:tps");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp'.
+ search_spec2.set_query("timestamp2 == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(
@@ -945,8 +1227,10 @@ TEST_F(
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- // 'body' should have a property id of 0 and 'subject' should have a property
- // id of 1.
+ // Create a schema with 3 properties:
+ // - "body": string type, indexed. Section id = 0.
+ // - "subject": string type, indexed. Section id = 1.
+ // - "timestamp": int64 type, indexed. Section id = 2.
SchemaProto email_with_body_schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -960,6 +1244,10 @@ TEST_F(
.SetName("body")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
@@ -972,7 +1260,7 @@ TEST_F(
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
- // Create a document with only a subject property.
+ // Create a document with only subject and timestamp property.
DocumentProto document =
DocumentBuilder()
.SetKey("namespace1", "uri1")
@@ -980,30 +1268,52 @@ TEST_F(
.SetCreationTimestampMs(1000)
.AddStringProperty("subject",
"Did you get the memo about TPS reports?")
+ .AddInt64Property("timestamp", 1234)
.Build();
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ // Verify term search
// We should be able to retrieve the document by searching for 'tps' in
// 'subject'.
- SearchSpecProto search_spec;
- search_spec.set_query("subject:tps");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("subject:tps");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp'.
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("timestamp == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Now update the schema to remove the 'body' field. This is backwards
// incompatible, but document should be preserved because it doesn't contain a
- // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to'
- // will now have property ids of 0 and 1 respectively. If not, then the hits
- // in the index will still have have a property id of 1 and therefore it won't
- // be found.
+ // 'body' field.
+ // - "subject": string type, indexed. Section id = 0.
+ // - "timestamp": int64 type, indexed. Section id = 1.
+ // - "to": string type, indexed. Section id = 2.
+ //
+ // If the index is not correctly rebuilt, then the hits of 'subject' and
+ // 'timestamp' in the index will still have old section ids of 1, 2 and
+ // therefore they won't be found.
SchemaProto email_no_body_schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1017,6 +1327,10 @@ TEST_F(
.SetName("to")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
@@ -1031,12 +1345,27 @@ TEST_F(
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+ // Verify term search
// We should be able to retrieve the document by searching for 'tps' in
// 'subject'.
- search_spec.set_query("subject:tps");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ search_spec1.set_query("subject:tps");
+ actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ // We should be able to retrieve the document by searching for 1234 in
+ // 'timestamp'.
+ search_spec2.set_query("timestamp == 1234");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineSchemaTest,
@@ -1401,18 +1730,13 @@ TEST_F(IcingSearchEngineSchemaTest,
}
TEST_F(IcingSearchEngineSchemaTest, SetSchemaCanDetectPreviousSchemaWasLost) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
+ SchemaTypeConfigProto message_schema_type_config =
+ CreateMessageSchemaTypeConfig();
+ message_schema_type_config.mutable_properties(0)->set_cardinality(
+ CARDINALITY_OPTIONAL);
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ SchemaProto schema;
+ *schema.add_types() = message_schema_type_config;
// Make an incompatible schema, a previously OPTIONAL field is REQUIRED
SchemaProto incompatible_schema = schema;
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index 9ebd060..3c32253 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/icing-search-engine.h"
-
#include <cstdint>
#include <limits>
#include <memory>
@@ -25,10 +23,9 @@
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
-#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
#include "icing/join/join-processor.h"
-#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
@@ -49,14 +46,10 @@
#include "icing/proto/usage.pb.h"
#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/random-string.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/snippet-helpers.h"
@@ -67,21 +60,12 @@ namespace lib {
namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::_;
using ::testing::ElementsAre;
using ::testing::Eq;
-using ::testing::Ge;
using ::testing::Gt;
-using ::testing::HasSubstr;
using ::testing::IsEmpty;
-using ::testing::Le;
-using ::testing::Lt;
-using ::testing::Matcher;
using ::testing::Ne;
-using ::testing::Return;
using ::testing::SizeIs;
-using ::testing::StrEq;
-using ::testing::UnorderedElementsAre;
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
@@ -3888,6 +3872,189 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
EqualsSearchResultIgnoreStatsAndScores(expected_result3));
}
+TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ const int32_t person1_doc_score = 10;
+ const int32_t person2_doc_score = 25;
+ const int32_t person3_doc_score = 123;
+ const int32_t email1_doc_score = 10;
+ const int32_t email2_doc_score = 15;
+ const int32_t email3_doc_score = 40;
+
+ // person1 has children email1 and email2.
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(person1_doc_score)
+ .Build();
+ // person2 has a single child email3
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(person2_doc_score)
+ .Build();
+ // person3 has no child.
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(person3_doc_score)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(email1_doc_score)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(email2_doc_score)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(email3_doc_score)
+ .Build();
+
+ // Set children scoring expression and their expected value.
+ ScoringSpecProto child_scoring_spec = GetDefaultScoringSpec();
+ child_scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ child_scoring_spec.set_advanced_scoring_expression(
+ "this.documentScore() * 2 + 1");
+ const int32_t exp_email1_score = email1_doc_score * 2 + 1;
+ const int32_t exp_email2_score = email2_doc_score * 2 + 1;
+ const int32_t exp_email3_score = email3_doc_score * 2 + 1;
+
+ // Set parent scoring expression and their expected value.
+ ScoringSpecProto parent_scoring_spec = GetDefaultScoringSpec();
+ parent_scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ parent_scoring_spec.set_advanced_scoring_expression(
+ "this.documentScore() * sum(this.childrenScores())");
+ const int32_t exp_person1_score =
+ person1_doc_score * (exp_email1_score + exp_email2_score);
+ const int32_t exp_person2_score = person2_doc_score * exp_email3_score;
+ const int32_t exp_person3_score = person3_doc_score * 0;
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ *nested_spec->mutable_scoring_spec() = child_scoring_spec;
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ SearchResultProto results =
+ icing.Search(search_spec, parent_scoring_spec, result_spec);
+ uint64_t next_page_token = results.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person2"));
+ // exp_person2_score = 2025
+ EXPECT_THAT(results.results(0).score(), Eq(exp_person2_score));
+
+ results = icing.GetNextPage(next_page_token);
+ next_page_token = results.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person1"));
+ // exp_person1_score = 520
+ EXPECT_THAT(results.results(0).score(), Eq(exp_person1_score));
+
+ results = icing.GetNextPage(next_page_token);
+ next_page_token = results.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person3"));
+ // exp_person3_score = 0
+ EXPECT_THAT(results.results(0).score(), Eq(exp_person3_score));
+}
+
TEST_F(IcingSearchEngineSearchTest, NumericFilterAdvancedQuerySucceeds) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -3969,6 +4136,101 @@ TEST_F(IcingSearchEngineSearchTest, NumericFilterAdvancedQuerySucceeds) {
EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
}
+TEST_F(IcingSearchEngineSearchTest,
+ NumericFilterAdvancedQueryWithPersistenceSucceeds) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ // Schema will be persisted to disk when icing goes out of scope.
+ }
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 25)
+ .AddInt64Property("cost", 2)
+ .Build();
+ {
+ // Ensure that icing initializes the schema and section_manager
+ // properly from the pre-existing file.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+ // The index and document store will be persisted to disk when icing goes
+ // out of scope.
+ }
+
+ {
+ // Ensure that the index is brought back up without problems and we
+ // can query for the content that we expect.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("price < 20");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+
+ search_spec.set_query("price == 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+ search_spec.set_query("cost > 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ search_spec.set_query("cost >= 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+ search_spec.set_query("price <= 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ }
+}
+
TEST_F(IcingSearchEngineSearchTest, NumericFilterOldQueryFails) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
diff --git a/icing/icing-search-engine_suggest_test.cc b/icing/icing-search-engine_suggest_test.cc
index dbd0a11..6973ad0 100644
--- a/icing/icing-search-engine_suggest_test.cc
+++ b/icing/icing-search-engine_suggest_test.cc
@@ -1299,6 +1299,215 @@ TEST_F(IcingSearchEngineSuggestTest,
ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_MultipleTerms_And) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionBarFo;
+ suggestionBarFo.set_query("bar fo");
+
+ // Search "bar AND f" only document 1 should match the search.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarFo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_MultipleTerms_Or) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "cat foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionBarCatFo;
+ suggestionBarCatFo.set_query("bar OR cat fo");
+ SuggestionResponse::Suggestion suggestionBarCatFoo;
+ suggestionBarCatFoo.set_query("bar OR cat foo");
+
+ // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
+ // foo" could match.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar OR cat f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarCatFo),
+ EqualsProto(suggestionBarCatFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_PropertyRestriction) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .AddStringProperty("emailAddress", "fo")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Add property restriction, only search for subject.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("subject:f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse::Suggestion suggestionSubjectFool;
+ suggestionSubjectFool.set_query("subject:fool");
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionSubjectFool)));
+
+ // Add property restriction, only search for nested sender.name
+ suggestion_spec.set_prefix("sender.name:f");
+ SuggestionResponse::Suggestion suggestionSenderNameFoo;
+ suggestionSenderNameFoo.set_query("sender.name:foo");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionSenderNameFoo)));
+
+ // Add property restriction, only search for nonExist section
+ suggestion_spec.set_prefix("none:f");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_AndOperatorPlusPropertyRestriction) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar fo") // "bar fo"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "bar cat foo") // "bar cat fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool") // "fool"
+ .AddStringProperty("body", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // Search for "bar AND subject:f"
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar subject:f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse::Suggestion suggestionBarSubjectFo;
+ suggestionBarSubjectFo.set_query("bar subject:fo");
+ SuggestionResponse::Suggestion suggestionBarSubjectFoo;
+ suggestionBarSubjectFoo.set_query("bar subject:foo");
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarSubjectFo),
+ EqualsProto(suggestionBarSubjectFoo)));
+
+ // Search for "bar AND cat AND subject:f"
+ suggestion_spec.set_prefix("bar cat subject:f");
+ SuggestionResponse::Suggestion suggestionBarCatSubjectFoo;
+ suggestionBarCatSubjectFoo.set_query("bar cat subject:foo");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionBarCatSubjectFoo)));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 9f21c9d..86a0826 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -36,8 +36,8 @@ namespace lib {
libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>>
IndexProcessor::Create(const Normalizer* normalizer, Index* index,
- NumericIndex<int64_t>* integer_index,
- const Clock* clock) {
+ NumericIndex<int64_t>* integer_index, const Clock* clock,
+ bool recovery_mode) {
ICING_RETURN_ERROR_IF_NULL(normalizer);
ICING_RETURN_ERROR_IF_NULL(index);
ICING_RETURN_ERROR_IF_NULL(integer_index);
@@ -50,7 +50,7 @@ IndexProcessor::Create(const Normalizer* normalizer, Index* index,
std::make_unique<IntegerSectionIndexingHandler>(clock, integer_index));
return std::unique_ptr<IndexProcessor>(
- new IndexProcessor(std::move(handlers), clock));
+ new IndexProcessor(std::move(handlers), clock, recovery_mode));
}
libtextclassifier3::Status IndexProcessor::IndexDocument(
@@ -59,7 +59,7 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
// TODO(b/259744228): set overall index latency.
for (auto& section_indexing_handler : section_indexing_handlers_) {
ICING_RETURN_IF_ERROR(section_indexing_handler->Handle(
- tokenized_document, document_id, put_document_stats));
+ tokenized_document, document_id, recovery_mode_, put_document_stats));
}
return libtextclassifier3::Status::OK;
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 45954c4..3d6b19a 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -37,12 +37,20 @@ class IndexProcessor {
// of any input components, and all pointers must refer to valid objects that
// outlive the created IndexProcessor instance.
//
+ // - recovery_mode: a flag indicates that if IndexProcessor is used to restore
+ // index. Since there are several indices (term, integer) being restored at
+ // the same time, we start with the minimum last added DocumentId of all
+ // indices and replay documents to re-index, so it is possible to get some
+ // previously indexed documents in the recovery mode. Therefore, we should
+ // skip them without returning an error in recovery mode.
+ //
// Returns:
// An IndexProcessor on success
// FAILED_PRECONDITION if any of the pointers is null.
static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
const Normalizer* normalizer, Index* index,
- NumericIndex<int64_t>* integer_index_, const Clock* clock);
+ NumericIndex<int64_t>* integer_index_, const Clock* clock,
+ bool recovery_mode = false);
// Add tokenized document to the index, associated with document_id. If the
// number of tokens in the document exceeds max_tokens_per_document, then only
@@ -65,13 +73,15 @@ class IndexProcessor {
private:
explicit IndexProcessor(std::vector<std::unique_ptr<SectionIndexingHandler>>&&
section_indexing_handlers,
- const Clock* clock)
+ const Clock* clock, bool recovery_mode)
: section_indexing_handlers_(std::move(section_indexing_handlers)),
- clock_(*clock) {}
+ clock_(*clock),
+ recovery_mode_(recovery_mode) {}
std::vector<std::unique_ptr<SectionIndexingHandler>>
section_indexing_handlers_;
const Clock& clock_;
+ bool recovery_mode_;
};
} // namespace lib
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 626efa7..c22e8f0 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -34,7 +34,7 @@
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -164,8 +164,7 @@ class IndexProcessorTest : public Test {
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- integer_index_,
- DummyNumericIndex<int64_t>::Create(filesystem_, integer_index_dir_));
+ integer_index_, IntegerIndex::Create(filesystem_, integer_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -375,9 +374,11 @@ TEST_F(IndexProcessorTest, OneDoc) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("hello", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("hello", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
@@ -386,8 +387,9 @@ TEST_F(IndexProcessorTest, OneDoc) {
kDocumentId0, expectedMap)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("hello", 1U << kPrefixedSectionId,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator(
+ "hello", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ 1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
@@ -428,9 +430,11 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("world", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("world", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap1{
@@ -443,17 +447,19 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
EqualsDocHitInfoWithTermFrequency(kDocumentId0, expectedMap2)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator(
+ "world", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ 1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
{kPrefixedSectionId, 2}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId1, expectedMap)));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("coffee", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("coffee", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expectedMap = {{kExactSectionId, Hit::kMaxTermFrequency}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -483,9 +489,11 @@ TEST_F(IndexProcessorTest, DocWithNestedProperty) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("rocky", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("rocky", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kNestedSectionId})));
@@ -508,9 +516,11 @@ TEST_F(IndexProcessorTest, DocWithRepeatedProperty) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("italian", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("italian", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kRepeatedSectionId})));
@@ -595,23 +605,27 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
// "good" should have been indexed normally.
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("good", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("good", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
// "night" should not have been.
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("night", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("night", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
// "night" should have been truncated to "nigh".
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("nigh", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("nigh", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
@@ -649,7 +663,9 @@ TEST_F(IndexProcessorTest, NonPrefixedContentPrefixQuery) {
// Only document_id 1 should surface in a prefix query for "Rock"
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("rock", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("rock", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
@@ -684,9 +700,11 @@ TEST_F(IndexProcessorTest, TokenNormalization) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("case", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("case", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(kDocumentId1,
@@ -701,6 +719,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
.SetKey("icing", "fake_type/1")
.SetSchema(std::string(kFakeType))
.AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 123)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
TokenizedDocument tokenized_document,
@@ -710,13 +729,19 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
- // Indexing a document with document_id < last_added_document_id should cause
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t index_element_size,
+ index_->GetElementsSize());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 integer_index_crc,
+ integer_index_->UpdateChecksums());
+
+ // Indexing a document with document_id <= last_added_document_id should cause
// a failure.
document =
DocumentBuilder()
.SetKey("icing", "fake_type/2")
.SetSchema(std::string(kFakeType))
.AddStringProperty(std::string(kExactProperty), "all lower case")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 456)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
tokenized_document,
@@ -724,12 +749,83 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
document));
EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Verify that both index_ and integer_index_ are unchanged.
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
// As should indexing a document document_id == last_added_document_id.
- EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+ EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Verify that both index_ and integer_index_ are unchanged.
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
+}
+
+TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IndexProcessor> index_processor,
+ IndexProcessor::Create(normalizer_.get(), index_.get(),
+ integer_index_.get(), &fake_clock_,
+ /*recovery_mode=*/true));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 123)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId1),
+ IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t index_element_size,
+ index_->GetElementsSize());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 integer_index_crc,
+ integer_index_->UpdateChecksums());
+ // Indexing a document with document_id <= last_added_document_id in recovery
+ // mode should not get any error, but IndexProcessor should still ignore it
+ // and index data should remain unchanged.
+ document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kExactProperty), "all lower case")
+ .AddInt64Property(std::string(kIndexableIntegerProperty), 456)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId0),
+ IsOk());
+ // Verify that both index_ and integer_index_ are unchanged.
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
+
+ // As should indexing a document document_id == last_added_document_id.
+ EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId1),
+ IsOk());
+ // Verify that both index_ and integer_index_ are unchanged.
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+ EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+ EXPECT_THAT(integer_index_->UpdateChecksums(),
+ IsOkAndHolds(integer_index_crc));
}
TEST_F(IndexProcessorTest, NonAsciiIndexing) {
@@ -754,9 +850,11 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("你好", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("你好", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kExactSectionId})));
@@ -910,7 +1008,8 @@ TEST_F(IndexProcessorTest, ExactVerbatimProperty) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("Hello, world!", kSectionIdMaskAll,
+ index_->GetIterator("Hello, world!", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
@@ -941,9 +1040,11 @@ TEST_F(IndexProcessorTest, PrefixVerbatimProperty) {
// We expect to match the document we indexed as "Hello, w" is a prefix
// of "Hello, world!"
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("Hello, w", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("Hello, w", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
@@ -973,7 +1074,9 @@ TEST_F(IndexProcessorTest, VerbatimPropertyDoesntMatchSubToken) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("world", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("world", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
// We should not have hits for term "world" as the index processor should
@@ -1003,9 +1106,11 @@ TEST_F(IndexProcessorTest, Rfc822PropertyExact) {
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
{kRfc822SectionId, 2}};
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("alexsav", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("alexsav", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1014,15 +1119,17 @@ TEST_F(IndexProcessorTest, Rfc822PropertyExact) {
expected_map = {{kRfc822SectionId, 1}};
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("com", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("alexsav@google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("alexsav@google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
@@ -1048,9 +1155,11 @@ TEST_F(IndexProcessorTest, Rfc822PropertyExactShouldNotReturnPrefix) {
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
{kRfc822SectionId, 2}};
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("alexsa", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("alexsa", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
}
@@ -1083,22 +1192,28 @@ TEST_F(IndexProcessorTest, Rfc822PropertyPrefix) {
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
{kRfc822SectionId, 1}};
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("alexsav@", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("alexsav@", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("goog", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("goog", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("ale", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("ale", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
@@ -1126,7 +1241,9 @@ TEST_F(IndexProcessorTest, Rfc822PropertyNoMatch) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("abc.xyz", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("abc.xyz", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
@@ -1151,9 +1268,11 @@ TEST_F(IndexProcessorTest, ExactUrlProperty) {
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("google", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("google", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
@@ -1161,25 +1280,28 @@ TEST_F(IndexProcessorTest, ExactUrlProperty) {
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("http", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("http", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("www.google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("www.google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expected_map)));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("http://www.google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("http://www.google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1206,20 +1328,24 @@ TEST_F(IndexProcessorTest, ExactUrlPropertyDoesNotMatchPrefix) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("co", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("co", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("mail.go", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("mail.go", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("mail.google.com", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("mail.google.com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
}
@@ -1245,7 +1371,9 @@ TEST_F(IndexProcessorTest, PrefixUrlProperty) {
// "goo" is a prefix of "google" and "google.com"
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("goo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("goo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
@@ -1254,8 +1382,10 @@ TEST_F(IndexProcessorTest, PrefixUrlProperty) {
kDocumentId0, expected_map)));
// "http" is a prefix of "http" and "http://www.google.com"
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("http", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("http", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlPrefixedSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1263,8 +1393,9 @@ TEST_F(IndexProcessorTest, PrefixUrlProperty) {
// "www.go" is a prefix of "www.google.com"
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("www.go", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("www.go", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
expected_map = {{kUrlPrefixedSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
@@ -1292,26 +1423,32 @@ TEST_F(IndexProcessorTest, PrefixUrlPropertyNoMatch) {
// no token starts with "gle", so we should have no hits
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("gle", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("gle", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
std::vector<DocHitInfoTermFrequencyPair> hits =
GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("w.goo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("w.goo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
// tokens have separators removed, so no hits here
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator(".com", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator(".com", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("calendar/render", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ itr, index_->GetIterator("calendar/render", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
hits = GetHitsWithTermFrequency(std::move(itr));
EXPECT_THAT(hits, IsEmpty());
}
diff --git a/icing/index/index.cc b/icing/index/index.cc
index a35c80d..5cfcd27 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -182,7 +182,8 @@ libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
+Index::GetIterator(const std::string& term, int term_start_index,
+ int unnormalized_term_length, SectionIdMask section_id_mask,
TermMatchType::Code term_match_type,
bool need_hit_term_frequency) {
std::unique_ptr<DocHitInfoIterator> lite_itr;
@@ -190,17 +191,19 @@ Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
switch (term_match_type) {
case TermMatchType::EXACT_ONLY:
lite_itr = std::make_unique<DocHitInfoIteratorTermLiteExact>(
- term_id_codec_.get(), lite_index_.get(), term, section_id_mask,
- need_hit_term_frequency);
+ term_id_codec_.get(), lite_index_.get(), term, term_start_index,
+ unnormalized_term_length, section_id_mask, need_hit_term_frequency);
main_itr = std::make_unique<DocHitInfoIteratorTermMainExact>(
- main_index_.get(), term, section_id_mask, need_hit_term_frequency);
+ main_index_.get(), term, term_start_index, unnormalized_term_length,
+ section_id_mask, need_hit_term_frequency);
break;
case TermMatchType::PREFIX:
lite_itr = std::make_unique<DocHitInfoIteratorTermLitePrefix>(
- term_id_codec_.get(), lite_index_.get(), term, section_id_mask,
- need_hit_term_frequency);
+ term_id_codec_.get(), lite_index_.get(), term, term_start_index,
+ unnormalized_term_length, section_id_mask, need_hit_term_frequency);
main_itr = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
- main_index_.get(), term, section_id_mask, need_hit_term_frequency);
+ main_index_.get(), term, term_start_index, unnormalized_term_length,
+ section_id_mask, need_hit_term_frequency);
break;
default:
return absl_ports::InvalidArgumentError(
diff --git a/icing/index/index.h b/icing/index/index.h
index 878ac59..3200d70 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -177,15 +177,18 @@ class Index {
IndexStorageInfoProto GetStorageInfo() const;
// Create an iterator to iterate through all doc hit infos in the index that
- // match the term. section_id_mask can be set to ignore hits from sections not
- // listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
- // that occur in section 3.
+ // match the term. term_start_index is the start index of the given term in
+ // the search query. unnormalized_term_length is the length of the given
+ // unnormalized term in the search query not listed in the mask.
+ // Eg. section_id_mask = 1U << 3; would only return hits that occur in
+ // section 3.
//
// Returns:
// unique ptr to a valid DocHitInfoIterator that matches the term
// INVALID_ARGUMENT if given an invalid term_match_type
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- const std::string& term, SectionIdMask section_id_mask,
+ const std::string& term, int term_start_index,
+ int unnormalized_term_length, SectionIdMask section_id_mask,
TermMatchType::Code term_match_type, bool need_hit_term_frequency = true);
// Finds terms with the given prefix in the given namespaces. If
@@ -262,6 +265,7 @@ class Index {
ICING_RETURN_IF_ERROR(main_index_->AddHits(
*term_id_codec_, std::move(outputs.backfill_map),
std::move(term_id_hit_pairs), lite_index_->last_added_document_id()));
+ ICING_RETURN_IF_ERROR(main_index_->PersistToDisk());
return lite_index_->Reset();
}
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index c7b6380..d563bcb 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -14,6 +14,8 @@
#include "icing/index/index.h"
+#include <unistd.h>
+
#include <algorithm>
#include <cstdint>
#include <limits>
@@ -93,10 +95,12 @@ class IndexTest : public Test {
}
libtextclassifier3::StatusOr<std::vector<DocHitInfo>> GetHits(
- std::string term, TermMatchType::Code match_type) {
+ std::string term, int term_start_index, int unnormalized_term_length,
+ TermMatchType::Code match_type) {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(term, kSectionIdMaskAll, match_type));
+ index_->GetIterator(term, term_start_index, unnormalized_term_length,
+ kSectionIdMaskAll, match_type));
return GetHits(std::move(itr));
}
@@ -154,13 +158,16 @@ TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
TEST_F(IndexTest, EmptyIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -171,13 +178,16 @@ TEST_F(IndexTest, EmptyIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -190,15 +200,18 @@ TEST_F(IndexTest, AdvancePastEnd) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(itr->doc_hit_info(),
EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -216,15 +229,18 @@ TEST_F(IndexTest, AdvancePastEndAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(itr->doc_hit_info(),
EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -240,7 +256,9 @@ TEST_F(IndexTest, SingleHitSingleTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -256,7 +274,9 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -271,26 +291,32 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId2, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId2, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Mapping to a different docid will translate the hit
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
/*new_last_added_document_id=*/1));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Mapping to kInvalidDocumentId will remove the hit.
ICING_ASSERT_OK(
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
/*new_last_added_document_id=*/0));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId0);
}
@@ -305,26 +331,32 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId2, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId2, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Mapping to a different docid will translate the hit
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
/*new_last_added_document_id=*/1));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Mapping to kInvalidDocumentId will remove the hit.
ICING_ASSERT_OK(
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
/*new_last_added_document_id=*/0));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), 0);
}
@@ -337,7 +369,9 @@ TEST_F(IndexTest, SingleHitMultiTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -354,7 +388,9 @@ TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -380,14 +416,17 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Delete document id 1, and document id 2 is translated to 1.
@@ -395,23 +434,30 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
/*new_last_added_document_id=*/1));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Delete all the rest documents.
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
/*new_last_added_document_id=*/kInvalidDocumentId));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
}
@@ -437,14 +483,17 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
/*new_last_added_document_id=*/2));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId2}))));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
// Delete document id 1, and document id 2 is translated to 1.
@@ -452,23 +501,30 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
/*new_last_added_document_id=*/1));
EXPECT_THAT(
- GetHits("foo", TermMatchType::EXACT_ONLY),
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
IsOkAndHolds(ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0,
std::vector<SectionId>{kSectionId2}))));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
// Delete all the rest documents.
ICING_ASSERT_OK(index_->Optimize(
/*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
/*new_last_added_document_id=*/kInvalidDocumentId));
- EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
- EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY),
- IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY),
+ IsOkAndHolds(IsEmpty()));
EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
}
@@ -481,7 +537,9 @@ TEST_F(IndexTest, NoHitMultiTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("baz", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -497,7 +555,9 @@ TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("baz", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
@@ -520,7 +580,9 @@ TEST_F(IndexTest, MultiHitMultiTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -548,7 +610,9 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -570,7 +634,9 @@ TEST_F(IndexTest, MultiHitSectionRestrict) {
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, desired_section,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -592,7 +658,9 @@ TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, desired_section,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -613,7 +681,9 @@ TEST_F(IndexTest, SingleHitDedupeIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -627,7 +697,9 @@ TEST_F(IndexTest, PrefixHit) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -643,7 +715,9 @@ TEST_F(IndexTest, PrefixHitAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -662,7 +736,9 @@ TEST_F(IndexTest, MultiPrefixHit) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -685,7 +761,9 @@ TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -706,7 +784,9 @@ TEST_F(IndexTest, NoExactHitInPrefixQuery) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kSectionId3})));
@@ -727,7 +807,9 @@ TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kSectionId3})));
@@ -742,7 +824,9 @@ TEST_F(IndexTest, PrefixHitDedupe) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -759,7 +843,9 @@ TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -769,21 +855,27 @@ TEST_F(IndexTest, PrefixToString) {
SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", id_mask, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, id_mask,
+ TermMatchType::PREFIX));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000001100:foo* OR "
"00000000000000000000000000000000000000000000"
"00000000000000001100:foo*)"));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
"111111111111111111111:foo* OR "
"11111111111111111111111111111111111111111111"
"11111111111111111111:foo*)"));
- ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskNone,
- TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskNone, TermMatchType::PREFIX));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000000000:foo* OR "
"00000000000000000000000000000000000000000000"
@@ -794,23 +886,27 @@ TEST_F(IndexTest, ExactToString) {
SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", id_mask, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, id_mask,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000001100:foo OR "
"00000000000000000000000000000000000000000000"
"00000000000000001100:foo)"));
ICING_ASSERT_OK_AND_ASSIGN(
- itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
"111111111111111111111:foo OR "
"11111111111111111111111111111111111111111111"
"11111111111111111111:foo)"));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("foo", kSectionIdMaskNone,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskNone, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
"000000000000000000000:foo OR "
"00000000000000000000000000000000000000000000"
@@ -826,14 +922,17 @@ TEST_F(IndexTest, NonAsciiTerms) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("こんに", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("あなた", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("あなた", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -850,14 +949,17 @@ TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("こんに", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
- ICING_ASSERT_OK_AND_ASSIGN(itr,
- index_->GetIterator("あなた", kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("あなた", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -915,7 +1017,8 @@ TEST_F(IndexTest, FullIndex) {
for (int i = 0; i < query_terms.size(); i += 25) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(query_terms.at(i).c_str(), kSectionIdMaskAll,
+ index_->GetIterator(query_terms.at(i).c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::PREFIX));
// Each query term should contain at least one hit - there may have been
// other hits for this term that were added.
@@ -923,7 +1026,8 @@ TEST_F(IndexTest, FullIndex) {
}
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> last_itr,
- index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::PREFIX));
EXPECT_THAT(last_itr->Advance(), IsOk());
EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
@@ -981,7 +1085,8 @@ TEST_F(IndexTest, FullIndexMerge) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> last_itr,
- index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::PREFIX));
EXPECT_THAT(last_itr->Advance(), IsOk());
EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
@@ -998,14 +1103,16 @@ TEST_F(IndexTest, FullIndexMerge) {
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(prefix + "bar", kSectionIdMaskAll,
+ index_->GetIterator(prefix + "bar", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
// We know that "bar" should have at least one hit because we just added it!
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
ICING_ASSERT_OK_AND_ASSIGN(
- last_itr, index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
- TermMatchType::PREFIX));
+ last_itr, index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(last_itr->Advance(), IsOk());
EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
}
@@ -1019,11 +1126,15 @@ TEST_F(IndexTest, OptimizeShouldWorkForEmptyIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
@@ -1064,14 +1175,18 @@ TEST_F(IndexTest, IndexShouldWorkAtSectionLimit) {
std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits,
- GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> hits,
+ GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
for (int i = 0; i < 4096; ++i) {
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(query_terms[i], TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(query_terms[i], /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
i, std::vector<SectionId>{(SectionId)(i % 64)})));
}
@@ -1124,8 +1239,10 @@ TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits,
- GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> hits,
+ GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
@@ -1133,7 +1250,9 @@ TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
if (i % 64 == 2) {
// Only section 2 is an exact section
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(std::to_string(i), TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(std::to_string(i), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
i, std::vector<SectionId>{(SectionId)(2)})));
}
@@ -1199,13 +1318,17 @@ TEST_F(IndexTest, IndexOptimize) {
index_->Optimize(document_id_old_to_new, new_last_added_document_id));
EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits,
- GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> hits,
+ GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+ TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
for (int i = 0; i < 2048; ++i) {
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(query_terms[i], TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(query_terms[i], /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
if (document_id_old_to_new[i] == kInvalidDocumentId) {
EXPECT_THAT(hits, IsEmpty());
} else {
@@ -1219,12 +1342,16 @@ TEST_F(IndexTest, IndexOptimize) {
ICING_ASSERT_OK(index_->Merge());
EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
// Check prefix search.
- ICING_ASSERT_OK_AND_ASSIGN(hits, GetHits(prefix, TermMatchType::PREFIX));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ hits, GetHits(prefix, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::PREFIX));
EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
// Check exact search.
for (int i = 0; i < 2048; ++i) {
ICING_ASSERT_OK_AND_ASSIGN(
- hits, GetHits(query_terms[i], TermMatchType::EXACT_ONLY));
+ hits,
+ GetHits(query_terms[i], /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
if (document_id_old_to_new[i] == kInvalidDocumentId) {
EXPECT_THAT(hits, IsEmpty());
} else {
@@ -1297,7 +1424,9 @@ TEST_F(IndexTest, IndexPersistence) {
// Check that the hits are present.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -1324,7 +1453,9 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
// Check that the hits are present.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -1920,7 +2051,9 @@ TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -1951,7 +2084,9 @@ TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2066,7 +2201,9 @@ TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2120,7 +2257,9 @@ TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2133,7 +2272,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
// Add one document to the lite index
@@ -2144,7 +2285,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
// Clipping to invalid should have no effect.
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2153,7 +2296,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
ICING_ASSERT_OK(index_->Merge());
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2167,7 +2312,9 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
// hits.
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2180,7 +2327,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
// Add one document to the lite index
@@ -2192,7 +2341,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
// Clipping to invalid should have no effect.
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2201,7 +2352,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
ICING_ASSERT_OK(index_->Merge());
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2216,7 +2369,9 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
// hits.
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
ICING_ASSERT_OK_AND_ASSIGN(
- itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ itr, index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0,
+ kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(
GetHits(std::move(itr)),
ElementsAre(
@@ -2246,7 +2401,9 @@ TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
// Clipping to document 0 should toss out the lite index, but keep the main.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -2279,7 +2436,9 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
// Clipping to document 0 should toss out both indices.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ index_->GetIterator("f", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
}
diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc
index a49b9f3..0ed01d3 100644
--- a/icing/index/integer-section-indexing-handler.cc
+++ b/icing/index/integer-section-indexing-handler.cc
@@ -26,10 +26,21 @@ namespace lib {
libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) {
- // TODO(b/259744228):
- // 1. Resolve last_added_document_id for index rebuilding before rollout
- // 2. Set integer indexing latency and other stats
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+ // TODO(b/259744228): set integer indexing latency and other stats
+
+ if (integer_index_.last_added_document_id() != kInvalidDocumentId &&
+ document_id <= integer_index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "DocumentId %d must be greater than last added document_id %d",
+ document_id, integer_index_.last_added_document_id()));
+ }
+ integer_index_.set_last_added_document_id(document_id);
libtextclassifier3::Status status;
// We have to add integer sections into integer index in reverse order because
@@ -55,7 +66,7 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
}
// Add all the seen keys to the integer index.
- status = editor->IndexAllBufferedKeys();
+ status = std::move(*editor).IndexAllBufferedKeys();
if (!status.ok()) {
ICING_LOG(WARNING) << "Failed to add keys into integer index due to: "
<< status.error_message();
diff --git a/icing/index/integer-section-indexing-handler.h b/icing/index/integer-section-indexing-handler.h
index dd0e46c..d75815c 100644
--- a/icing/index/integer-section-indexing-handler.h
+++ b/icing/index/integer-section-indexing-handler.h
@@ -33,17 +33,17 @@ class IntegerSectionIndexingHandler : public SectionIndexingHandler {
~IntegerSectionIndexingHandler() override = default;
- // TODO(b/259744228): update this documentation after resolving
- // last_added_document_id problem.
// Handles the integer indexing process: add hits into the integer index for
// all contents in tokenized_document.integer_sections.
//
- /// Returns:
+ // Returns:
// - OK on success
+ // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+ // document_id of a previously indexed document in non recovery mode.
// - Any NumericIndex<int64_t>::Editor errors.
libtextclassifier3::Status Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) override;
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
NumericIndex<int64_t>& integer_index_;
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
index e75ed87..67c7d25 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
@@ -40,5 +40,12 @@ libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAllDocumentId::TrimRightMostNode() && {
+ // The all document id node should be trimmed.
+ TrimmedNode node = {nullptr, /*term=*/"", /*term_start_index_=*/0,
+ /*unnormalized_term_length_=*/0};
+ return node;
+}
} // namespace lib
} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
index 0fa74f5..bb16eaf 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
@@ -35,6 +35,8 @@ class DocHitInfoIteratorAllDocumentId : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override { return 0; }
int32_t GetNumLeafAdvanceCalls() const override {
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
index 7366b97..ea2dda6 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
@@ -32,6 +32,7 @@ namespace {
using ::testing::ElementsAreArray;
using ::testing::Eq;
+using ::testing::IsNull;
using ::testing::Not;
TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
@@ -108,6 +109,16 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) {
}
}
+TEST(DocHitInfoIteratorAllDocumentIdTest, TrimAllDocumentIdIterator) {
+ DocHitInfoIteratorAllDocumentId all_it(100);
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(all_it).TrimRightMostNode());
+ // The whole iterator is trimmed
+ EXPECT_THAT(trimmed_node.term_, testing::IsEmpty());
+ EXPECT_THAT(trimmed_node.term_start_index_, 0);
+ EXPECT_THAT(trimmed_node.iterator_, IsNull());
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 3b7ede9..185a35e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -111,6 +111,19 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAnd::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
+ std::move(*long_).TrimRightMostNode());
+ if (trimmed_long.iterator_ == nullptr) {
+ trimmed_long.iterator_ = std::move(short_);
+ } else {
+ trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
+ std::move(short_), std::move(trimmed_long.iterator_));
+ }
+ return trimmed_long;
+}
+
int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
}
@@ -195,6 +208,27 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAndNary::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(
+ TrimmedNode trimmed_right,
+ std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
+ if (trimmed_right.iterator_ == nullptr) {
+ if (iterators_.size() > 2) {
+ iterators_.pop_back();
+ trimmed_right.iterator_ =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
+ } else if (iterators_.size() == 2) {
+ trimmed_right.iterator_ = std::move(iterators_.at(0));
+ }
+ } else {
+ iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
+ trimmed_right.iterator_ =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
+ }
+ return trimmed_right;
+}
+
int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
int32_t blockCount = 0;
for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index 8ceff44..0f40f94 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -40,6 +40,8 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator {
std::unique_ptr<DocHitInfoIterator> long_it);
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
@@ -74,6 +76,8 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
index 9b9f44b..51828cb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
@@ -171,6 +171,123 @@ TEST(DocHitInfoIteratorAndTest, AdvanceNestedIterators) {
EXPECT_THAT(GetDocumentIds(outer_iter.get()), ElementsAre(10, 6, 2));
}
+TEST(DocHitInfoIteratorAndTest, TrimAndIterator) {
+ std::vector<DocHitInfo> left_vector = {DocHitInfo(3), DocHitInfo(2)};
+ std::vector<DocHitInfo> right_vector = {DocHitInfo(1), DocHitInfo(0)};
+
+ std::unique_ptr<DocHitInfoIterator> left_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(left_vector);
+ std::unique_ptr<DocHitInfoIterator> right_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(right_vector, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iter),
+ std::move(right_iter));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(3, 2));
+}
+
+TEST(DocHitInfoIteratorAndTest, TrimAndIterator_TwoLayer) {
+ // Build an interator tree like:
+ //
+ // AND
+ // / \
+ // first AND
+ // | / \
+ // {0, 1} second third
+ // | |
+ // {1} {0}
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(1), DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(0)};
+
+ std::unique_ptr<DocHitInfoIterator> first_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+ std::unique_ptr<DocHitInfoIterator> second_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+ std::unique_ptr<DocHitInfoIterator> third_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(third_vector, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> nested_iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(second_iter),
+ std::move(third_iter));
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(first_iter),
+ std::move(nested_iter));
+
+ // The third_iter is trimmed.
+ // AND
+ // / \
+ // first second
+ // | |
+ // {0, 1} {1}
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(1));
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, TrimAndNaryIterator) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(2), DocHitInfo(1),
+ DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(2), DocHitInfo(1)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(2)};
+
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+ iterators.push_back(
+ std::make_unique<DocHitInfoIteratorDummy>(third_vector, "term", 10));
+
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+
+ // The third iterator is trimmed
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(2, 1));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, TrimAndNaryIterator_TwoLayer) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(3), DocHitInfo(2),
+ DocHitInfo(1), DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(2), DocHitInfo(1),
+ DocHitInfo(0)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(1), DocHitInfo(0)};
+ std::vector<DocHitInfo> forth_vector = {DocHitInfo(0)};
+
+ // Build nested iterator
+ std::unique_ptr<DocHitInfoIterator> third_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+ std::unique_ptr<DocHitInfoIterator> forth_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(forth_vector, "term", 10);
+ std::unique_ptr<DocHitInfoIterator> nested_iter =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(third_iter),
+ std::move(forth_iter));
+
+ // Build outer iterator
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+ iterators.push_back(std::move(nested_iter));
+ std::unique_ptr<DocHitInfoIterator> iter =
+ std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+
+ // The forth iterator is trimmed.
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*iter).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(1, 0));
+}
+
TEST(DocHitInfoIteratorAndTest, SectionIdMask) {
// Arbitrary section ids for the documents in the DocHitInfoIterators.
// Created to test correct section_id_mask behavior.
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index 2e8ba23..83a73a4 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -105,6 +105,18 @@ libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorFilter::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+ std::move(*delegate_).TrimRightMostNode());
+ if (trimmed_delegate.iterator_ != nullptr) {
+ trimmed_delegate.iterator_ = std::make_unique<DocHitInfoIteratorFilter>(
+ std::move(trimmed_delegate.iterator_), &document_store_, &schema_store_,
+ options_);
+ }
+ return trimmed_delegate;
+}
+
int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
return delegate_->GetNumBlocksInspected();
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index 5051607..ab13ae5 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -60,6 +60,8 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index 967e518..ddb216a 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -25,6 +25,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/proto/document.pb.h"
@@ -878,6 +879,55 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
}
+TEST_F(DocHitInfoIteratorFilterTest, TrimFilterIterator) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(document1_namespace1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(document2_namespace1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id3,
+ document_store_->Put(document3_namespace2_schema1_));
+
+ // Build an interator tree like:
+ // Filter
+ // |
+ // AND
+ // / \
+ // {1, 3} {2}
+ std::vector<DocHitInfo> left_vector = {DocHitInfo(document_id1),
+ DocHitInfo(document_id3)};
+ std::vector<DocHitInfo> right_vector = {DocHitInfo(document_id2)};
+
+ std::unique_ptr<DocHitInfoIterator> left_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(left_vector);
+ std::unique_ptr<DocHitInfoIterator> right_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(right_vector, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iter),
+ std::move(right_iter));
+
+ DocHitInfoIteratorFilter::Options options;
+ // Filters out document3 by namespace
+ options.namespaces = std::vector<std::string_view>{namespace1_};
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
+
+ // The trimmed tree.
+ // Filter
+ // |
+ // {1, 3}
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(filtered_iterator).TrimRightMostNode());
+ EXPECT_THAT(trimmed_node.term_, Eq("term"));
+ EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+ EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()),
+ ElementsAre(document_id1));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-none.h b/icing/index/iterator/doc-hit-info-iterator-none.h
new file mode 100644
index 0000000..f938d32
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-none.h
@@ -0,0 +1,52 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+
+namespace icing {
+namespace lib {
+
+// Iterator that will return no results.
+class DocHitInfoIteratorNone : public DocHitInfoIterator {
+ public:
+ libtextclassifier3::Status Advance() override {
+ return absl_ports::ResourceExhaustedError(
+ "DocHitInfoIterator NONE has no hits.");
+ }
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ TrimmedNode node = {nullptr, /*term=*/"", /*term_start_index_=*/0,
+ /*unnormalized_term_length_=*/0};
+ return node;
+ }
+
+ int32_t GetNumBlocksInspected() const override { return 0; }
+
+ int32_t GetNumLeafAdvanceCalls() const override { return 0; }
+
+ std::string ToString() const override { return "(NONE)"; }
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index 8fb3659..1818f08 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -60,6 +60,13 @@ libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorNot::TrimRightMostNode() && {
+ // Don't generate suggestion if the last operator is NOT.
+ return absl_ports::UnimplementedError(
+ "Cannot trim right most node in NOT operator.");
+}
+
int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
return to_be_excluded_->GetNumBlocksInspected() +
all_document_id_iterator_.GetNumBlocksInspected();
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.h b/icing/index/iterator/doc-hit-info-iterator-not.h
index 58e909d..8cc3bf3 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.h
+++ b/icing/index/iterator/doc-hit-info-iterator-not.h
@@ -50,6 +50,11 @@ class DocHitInfoIteratorNot : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ // The NOT operator is not suppose to be trimmed.
+ // We shouldn't generate suggestion for the last term if the last term belongs
+ // to NOT operator.
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
index 5d0e4ac..54d6c36 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
@@ -155,6 +155,17 @@ TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) {
DocHitInfo(0, kSectionIdMaskNone)));
}
+TEST(DocHitInfoIteratorNotTest, TrimNotIterator) {
+ std::vector<DocHitInfo> exclude_doc_hit_infos = {DocHitInfo(0)};
+ std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+ DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+ /*document_id_limit=*/5);
+ EXPECT_THAT(std::move(not_iterator).TrimRightMostNode(),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
index 655cafc..8f7b84f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc
@@ -21,6 +21,7 @@
#include "icing/absl_ports/str_cat.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/store/document-id.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -57,6 +58,26 @@ DocHitInfoIteratorOr::DocHitInfoIteratorOr(
std::unique_ptr<DocHitInfoIterator> right_it)
: left_(std::move(left_it)), right_(std::move(right_it)) {}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorOr::TrimRightMostNode() && {
+ // Trim the whole OR iterator. Only keep the prefix of the right iterator.
+ //
+ // The OR operator has higher priority, it is not possible that we have an
+ // unfinished prefix in the nested iterator right-most child we need to search
+ // suggestion for.
+ //
+ // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
+ // unfinished last term to be filled.
+ //
+ // If we need to trim a OR iterator for search suggestion, the right child
+ // must be the last term. We don't need left side information to
+ // generate suggestion for the right side.
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
+ std::move(*right_).TrimRightMostNode());
+ trimmed_right.iterator_ = nullptr;
+ return trimmed_right;
+}
+
libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
// Cache the document_id of the left iterator for comparison to the right.
DocumentId orig_left_document_id = left_document_id_;
@@ -140,6 +161,26 @@ DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
: iterators_(std::move(iterators)) {}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorOrNary::TrimRightMostNode() && {
+ // Trim the whole OR iterator.
+ //
+ // The OR operator has higher priority, it is not possible that we have an
+ // unfinished prefix in the nested iterator right-most child we need to search
+ // suggestion for.
+ //
+ // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
+ // unfinished last term to be filled.
+ //
+ // If we need to trim a OR iterator for search suggestion, the right-most
+ // child must be the last term. We don't need left side information to
+ // generate suggestion for the right side.
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
+ std::move(*iterators_.back()).TrimRightMostNode());
+ trimmed_right.iterator_ = nullptr;
+ return trimmed_right;
+}
+
libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
current_iterators_.clear();
if (iterators_.size() < 2) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 2dae68d..1e9847d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -34,6 +34,8 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator {
explicit DocHitInfoIteratorOr(std::unique_ptr<DocHitInfoIterator> left_it,
std::unique_ptr<DocHitInfoIterator> right_it);
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
libtextclassifier3::Status Advance() override;
int32_t GetNumBlocksInspected() const override;
@@ -77,6 +79,8 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
explicit DocHitInfoIteratorOrNary(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
libtextclassifier3::Status Advance() override;
int32_t GetNumBlocksInspected() const override;
diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
index f487801..1950c01 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
@@ -312,6 +312,47 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
}
}
+TEST(DocHitInfoIteratorOrTest, TrimOrIterator) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+
+ std::unique_ptr<DocHitInfoIterator> first_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+ std::unique_ptr<DocHitInfoIterator> second_iter =
+ std::make_unique<DocHitInfoIteratorDummy>(second_vector, "term", 10);
+
+ DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(or_iter).TrimRightMostNode());
+ // The whole iterator is trimmed
+ ASSERT_TRUE(trimmed_node.iterator_ == nullptr);
+ ASSERT_THAT(trimmed_node.term_, Eq("term"));
+ ASSERT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, TrimOrNaryIterator) {
+ std::vector<DocHitInfo> first_vector = {DocHitInfo(0)};
+ std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+ std::vector<DocHitInfo> third_vector = {DocHitInfo(2)};
+ std::vector<DocHitInfo> forth_vector = {DocHitInfo(3)};
+
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+ iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(third_vector));
+ iterators.push_back(
+ std::make_unique<DocHitInfoIteratorDummy>(forth_vector, "term", 10));
+ DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(or_iter).TrimRightMostNode());
+ // The whole iterator is trimmed
+ ASSERT_TRUE(trimmed_node.iterator_ == nullptr);
+ ASSERT_THAT(trimmed_node.term_, Eq("term"));
+ ASSERT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 0871436..d60e68e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -38,13 +38,15 @@ namespace lib {
DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- std::string target_section)
+ std::set<std::string> target_sections)
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- target_section_(std::move(target_section)) {}
+ target_sections_(std::move(target_sections)) {}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
while (delegate_->Advance().ok()) {
DocumentId document_id = delegate_->doc_hit_info().document_id();
@@ -61,8 +63,8 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
- // A hit can be in multiple sections at once, need to check that at least
- // one of the confirmed section ids match the name of the target section
+ // A hit can be in multiple sections at once, need to check which of the
+ // section ids match the target sections
while (section_id_mask != 0) {
// There was a hit in this section id
SectionId section_id = __builtin_ctzll(section_id_mask);
@@ -74,11 +76,10 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
const SectionMetadata* section_metadata =
section_metadata_or.ValueOrDie();
- if (section_metadata->path == target_section_) {
+ if (target_sections_.find(section_metadata->path) !=
+ target_sections_.end()) {
// The hit was in the target section name, return OK/found
- doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ = UINT64_C(1) << section_id;
- return libtextclassifier3::Status::OK;
+ hit_intersect_section_ids_mask_ |= UINT64_C(1) << section_id;
}
}
@@ -86,15 +87,35 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
section_id_mask &= ~(UINT64_C(1) << section_id);
}
+ if (hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
+ doc_hit_info_ = delegate_->doc_hit_info();
+ doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ return libtextclassifier3::Status::OK;
+ }
// Didn't find a matching section name for this hit. Continue.
}
// Didn't find anything on the delegate iterator.
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
+ ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+ std::move(*delegate_).TrimRightMostNode());
+ if (trimmed_delegate.iterator_ == nullptr) {
+ // TODO(b/228240987): Update TrimmedNode and downstream code to handle
+ // multiple section restricts.
+ trimmed_delegate.target_section_ = std::move(*target_sections_.begin());
+ return trimmed_delegate;
+ }
+ trimmed_delegate.iterator_ =
+ std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(trimmed_delegate.iterator_), &document_store_,
+ &schema_store_, std::move(target_sections_));
+ return std::move(trimmed_delegate);
+}
+
int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
return delegate_->GetNumBlocksInspected();
}
@@ -104,7 +125,8 @@ int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
}
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
- return absl_ports::StrCat(target_section_, ": ", delegate_->ToString());
+ return absl_ports::StrCat("(", absl_ports::StrJoin(target_sections_, ","),
+ "): ", delegate_->ToString());
}
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index 2639e67..92971a6 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -42,10 +42,12 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
explicit DocHitInfoIteratorSectionRestrict(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- std::string target_section);
+ std::set<std::string> target_sections);
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override;
int32_t GetNumLeafAdvanceCalls() const override;
@@ -74,8 +76,7 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
- // Ensure that this does not outlive the underlying string value.
- std::string target_section_;
+ std::set<std::string> target_sections_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 6d41e90..5dd69c1 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -24,6 +24,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/proto/document.pb.h"
@@ -47,6 +48,9 @@ using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+constexpr SectionId kIndexedSectionId0 = 0;
+constexpr SectionId kIndexedSectionId1 = 1;
+
class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
protected:
DocHitInfoIteratorSectionRestrictTest()
@@ -54,24 +58,39 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- document_ =
- DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
-
- schema_ = SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("email")
- // Add an indexed property so we generate section
- // metadata on it
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(indexed_property_)
- .SetDataTypeString(TERM_MATCH_EXACT,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- // First and only indexed property, so it gets the first id of 0
- indexed_section_id_ = 0;
+ document1_ = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("email")
+ .Build();
+ document2_ = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("email")
+ .Build();
+ document3_ = DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("email")
+ .Build();
+
+ indexed_section_0 = "indexedSection0";
+ indexed_section_1 = "indexedSection1";
+ schema_ =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_0)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_1)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -95,10 +114,12 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
std::unique_ptr<DocumentStore> document_store_;
const Filesystem filesystem_;
const std::string test_dir_;
+ std::string indexed_section_0;
+ std::string indexed_section_1;
SchemaProto schema_;
- DocumentProto document_;
- const std::string indexed_property_ = "subject";
- int indexed_section_id_;
+ DocumentProto document1_;
+ DocumentProto document2_;
+ DocumentProto document3_;
FakeClock fake_clock_;
};
@@ -106,7 +127,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
// Arbitrary section ids for the documents in the DocHitInfoIterators.
// Created to test correct section_id_mask behavior.
@@ -128,7 +149,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// get a result.
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/indexed_property_);
+ /*target_sections=*/{indexed_section_0});
std::vector<TermMatchInfo> matched_terms_stats;
section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
@@ -156,7 +177,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
DocHitInfoIteratorSectionRestrict filtered_iterator(
std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), /*target_section=*/"");
+ schema_store_.get(), /*target_sections=*/{});
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -167,9 +188,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
- SectionIdMask section_id_mask = 1U << indexed_section_id_;
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
// Create a hit that was found in the indexed section
std::vector<DocHitInfo> doc_hit_infos = {
@@ -181,12 +202,99 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
// Filtering for the indexed section name should get a result
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- indexed_property_);
+ /*target_sections=*/{indexed_section_0});
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator),
ElementsAre(document_id));
}
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ IncludesHitWithMultipleMatchingSectionsWithMultipleSectionRestricts) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
+ section_id_mask |= 1U << kIndexedSectionId1;
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id, section_id_mask)};
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ // Filter for both target_sections
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_sections=*/{indexed_section_0, indexed_section_1});
+
+ ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::vector<SectionId> expected_section_ids = {kIndexedSectionId0,
+ kIndexedSectionId1};
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EqualsDocHitInfo(document_id, expected_section_ids));
+ EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ Eq(section_id_mask));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ IncludesHitWithMultipleMatchingSectionsWithSingleSectionRestrict) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
+ section_id_mask |= 1U << kIndexedSectionId1;
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id, section_id_mask)};
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ // Filter for both target_sections
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_sections=*/{indexed_section_1});
+
+ ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EqualsDocHitInfo(document_id, expected_section_ids));
+ EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ Eq(1U << kIndexedSectionId1));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ IncludesHitWithSingleMatchingSectionsWithMultiSectionRestrict) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId1;
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id, section_id_mask)};
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ // Filter for both target_sections
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_sections=*/{indexed_section_0, indexed_section_1});
+
+ ICING_ASSERT_OK(section_restrict_iterator.Advance());
+ std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info(),
+ EqualsDocHitInfo(document_id, expected_section_ids));
+ EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ Eq(1U << kIndexedSectionId1));
+}
+
TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
// Create a hit with a document id that doesn't exist in the DocumentStore yet
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
@@ -197,7 +305,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
// Filtering for the indexed section name should get a result
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/"");
+ /*target_sections=*/{""});
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -209,9 +317,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DoesntIncludeHitWithWrongSectionName) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
- SectionIdMask section_id_mask = 1U << indexed_section_id_;
+ SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
// Create a hit that was found in the indexed section
std::vector<DocHitInfo> doc_hit_infos = {
@@ -223,7 +331,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// Filtering for the indexed section name should get a result
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- "some_section_name");
+ /*target_sections=*/{"some_section_name"});
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -235,7 +343,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DoesntIncludeHitWithNoSectionIds) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
// Create a hit that doesn't exist in any sections, so it shouldn't match any
// section filters
@@ -247,7 +355,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- indexed_property_);
+ /*target_sections=*/{indexed_section_0});
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -259,7 +367,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DoesntIncludeHitWithDifferentSectionId) {
// Populate the DocumentStore's FilterCache with this document's data
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(document_));
+ document_store_->Put(document1_));
// Anything that's not 0, which is the indexed property
SectionId not_matching_section_id = 2;
@@ -274,7 +382,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- indexed_property_);
+ /*target_sections=*/{indexed_section_0});
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
std::vector<TermMatchInfo> matched_terms_stats;
@@ -288,7 +396,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/"");
+ /*target_sections=*/{""});
EXPECT_THAT(section_restrict_iterator.GetNumBlocksInspected(), Eq(5));
}
@@ -299,11 +407,102 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumLeafAdvanceCalls) {
DocHitInfoIteratorSectionRestrict section_restrict_iterator(
std::move(original_iterator), document_store_.get(), schema_store_.get(),
- /*target_section=*/"");
+ /*target_sections=*/{""});
EXPECT_THAT(section_restrict_iterator.GetNumLeafAdvanceCalls(), Eq(6));
}
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ TrimSectionRestrictIterator_TwoLayer) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(document3_));
+
+ // 0 is the indexed property
+ SectionId matching_section_id = 0;
+ // Anything that's not 0, which is the indexed property
+ SectionId not_matching_section_id = 2;
+
+ // Build an interator tree like:
+ // Restrict
+ // |
+ // AND
+ // / \
+ // [1, 1],[2, 2] [3, 2]
+ std::vector<DocHitInfo> left_infos = {
+ DocHitInfo(document_id1, 1U << matching_section_id),
+ DocHitInfo(document_id2, 1U << not_matching_section_id)};
+ std::vector<DocHitInfo> right_infos = {
+ DocHitInfo(document_id3, 1U << not_matching_section_id)};
+
+ std::unique_ptr<DocHitInfoIterator> left_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(left_infos);
+ std::unique_ptr<DocHitInfoIterator> right_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(right_infos, "term", 10);
+
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iterator),
+ std::move(right_iterator));
+
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ {indexed_section_0});
+
+ // The trimmed tree.
+ // Restrict
+ // |
+ // [1, 1],[2, 2]
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocHitInfoIterator::TrimmedNode node,
+ std::move(section_restrict_iterator).TrimRightMostNode());
+
+ EXPECT_THAT(GetDocumentIds(node.iterator_.get()), ElementsAre(document_id1));
+ EXPECT_THAT(node.term_, Eq("term"));
+ EXPECT_THAT(node.term_start_index_, Eq(10));
+ EXPECT_THAT(node.target_section_, Eq(""));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+
+ // 0 is the indexed property
+ SectionId matching_section_id = 0;
+ // Anything that's not 0, which is the indexed property
+ SectionId not_matching_section_id = 2;
+
+ // Build an interator tree like:
+ // Restrict
+ // |
+ // [1, 1],[2, 2]
+ std::vector<DocHitInfo> doc_infos = {
+ DocHitInfo(document_id1, 1U << matching_section_id),
+ DocHitInfo(document_id2, 1U << not_matching_section_id)};
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_infos, "term", 10);
+
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ {indexed_section_0});
+
+ // The trimmed tree has null iterator but has target section.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocHitInfoIterator::TrimmedNode node,
+ std::move(section_restrict_iterator).TrimRightMostNode());
+
+ EXPECT_THAT(node.iterator_, testing::IsNull());
+ EXPECT_THAT(node.term_, Eq("term"));
+ EXPECT_THAT(node.term_start_index_, Eq(10));
+ EXPECT_THAT(node.target_section_, Eq(indexed_section_0));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index fe3a4b9..a77b91c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -80,8 +80,12 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
: doc_hit_infos_(std::move(doc_hit_infos)), term_(std::move(term)) {}
explicit DocHitInfoIteratorDummy(const std::vector<DocHitInfo>& doc_hit_infos,
- std::string term = "")
- : term_(std::move(term)) {
+ std::string term = "",
+ int term_start_index = 0,
+ int unnormalized_term_length = 0)
+ : term_(std::move(term)),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length) {
for (auto& doc_hit_info : doc_hit_infos) {
doc_hit_infos_.push_back(DocHitInfoTermFrequencyPair(doc_hit_info));
}
@@ -98,6 +102,12 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
"No more DocHitInfos in iterator");
}
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+ unnormalized_term_length_};
+ return node;
+ }
+
// Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
@@ -170,6 +180,8 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
int32_t num_leaf_advance_calls_ = 0;
std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos_;
std::string term_;
+ int term_start_index_;
+ int unnormalized_term_length_;
};
inline std::vector<DocumentId> GetDocumentIds(DocHitInfoIterator* iterator) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index b73b264..e1f06d0 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -62,6 +62,43 @@ struct TermMatchInfo {
// }
class DocHitInfoIterator {
public:
+ struct TrimmedNode {
+ // the query results which we should only search for suggestion in these
+ // documents.
+ std::unique_ptr<DocHitInfoIterator> iterator_;
+ // term of the trimmed node which we need to generate suggested strings.
+ std::string term_;
+ // the string in the query which indicates the target section we should
+ // search for suggestions.
+ std::string target_section_;
+ // the start index of the current term in the given search query.
+ int term_start_index_;
+ // The length of the given unnormalized term in the search query
+ int unnormalized_term_length_;
+
+ TrimmedNode(std::unique_ptr<DocHitInfoIterator> iterator, std::string term,
+ int term_start_index, int unnormalized_term_length)
+ : iterator_(std::move(iterator)),
+ term_(term),
+ target_section_(""),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length) {}
+ };
+
+ // Trim the right-most itertor of the itertor tree.
+ // This is to support search suggestion for the last terms which is the
+ // right-most node of the root iterator tree. Only support trim the right-most
+ // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF and Filter itertor.
+ //
+ // After calling this method, this iterator is no longer usable. Please use
+ // the returned iterator.
+ // Returns:
+ // the new iterator without the right-most child, if was able to trim the
+ // right-most node.
+ // nullptr if the current iterator should be trimmed.
+ // INVALID_ARGUMENT if the right-most node is not suppose to be trimmed.
+ virtual libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && = 0;
+
virtual ~DocHitInfoIterator() = default;
// Returns:
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index b01f278..acf3b33 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -15,6 +15,7 @@
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include <array>
+#include <cstddef>
#include <cstdint>
#include <numeric>
@@ -73,6 +74,14 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorTermLite::TrimRightMostNode() && {
+ // Leaf iterator should trim itself.
+ DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+ unnormalized_term_length_};
+ return node;
+}
+
libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
// Exact match only. All hits in lite lexicon are exact.
ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->GetTermId(term_));
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index bd8a6ee..873ea89 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -33,9 +33,13 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
: term_(term),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length),
lite_index_(lite_index),
cached_hits_idx_(-1),
term_id_codec_(term_id_codec),
@@ -45,6 +49,8 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override { return 0; }
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
@@ -91,6 +97,10 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
const std::string term_;
+ // The start index of the given term in the search query
+ int term_start_index_;
+ // The length of the given unnormalized term in the search query
+ int unnormalized_term_length_;
LiteIndex* const lite_index_;
// Stores hits retrieved from the index. This may only be a subset of the hits
// that are present in the index. Current value pointed to by the Iterator is
@@ -111,9 +121,12 @@ class DocHitInfoIteratorTermLiteExact : public DocHitInfoIteratorTermLite {
explicit DocHitInfoIteratorTermLiteExact(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_id_mask,
bool need_hit_term_frequency)
: DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
+ term_start_index, unnormalized_term_length,
section_id_mask, need_hit_term_frequency) {}
std::string ToString() const override;
@@ -127,9 +140,12 @@ class DocHitInfoIteratorTermLitePrefix : public DocHitInfoIteratorTermLite {
explicit DocHitInfoIteratorTermLitePrefix(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_id_mask,
bool need_hit_term_frequency)
: DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
+ term_start_index, unnormalized_term_length,
section_id_mask, need_hit_term_frequency) {}
std::string ToString() const override;
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index c7255fd..916a14b 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -181,12 +181,13 @@ class LiteIndex {
uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) {
absl_ports::shared_lock l(&mutex_);
- return header_->cur_size();
+ return sizeLocked();
}
bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) {
- return size() >= (options_.hit_buffer_want_merge_bytes /
- sizeof(TermIdHitPair::Value));
+ absl_ports::shared_lock l(&mutex_);
+ return is_full() || sizeLocked() >= (options_.hit_buffer_want_merge_bytes /
+ sizeof(TermIdHitPair::Value));
}
class const_iterator {
@@ -325,6 +326,10 @@ class LiteIndex {
// Check if the hit buffer has reached its capacity.
bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_);
+ uint32_t sizeLocked() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size();
+ }
+
// Non-locking implementation for empty().
bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
return size_impl() == 0;
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index c3f52b1..5f141ed 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc
@@ -135,7 +135,8 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
std::make_unique<DocHitInfoIteratorTermLiteExact>(
- term_id_codec_.get(), lite_index_.get(), term, kSectionIdMaskAll,
+ term_id_codec_.get(), lite_index_.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
/*need_hit_term_frequency=*/true);
ASSERT_THAT(iter->Advance(), IsOk());
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index b678d3f..8f0d3f5 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -81,6 +81,14 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorTermMain::TrimRightMostNode() && {
+ // Leaf iterator should trim itself.
+ DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+ unnormalized_term_length_};
+ return node;
+}
+
libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
DocHitInfo last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index c57ce72..08a385c 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -33,9 +33,14 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
public:
explicit DocHitInfoIteratorTermMain(MainIndex* main_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
: term_(term),
+ term_start_index_(term_start_index),
+ unnormalized_term_length_(unnormalized_term_length),
+ posting_list_accessor_(nullptr),
main_index_(main_index),
cached_doc_hit_infos_idx_(-1),
num_advance_calls_(0),
@@ -46,6 +51,8 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
libtextclassifier3::Status Advance() override;
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
int32_t GetNumBlocksInspected() const override {
return num_blocks_inspected_;
}
@@ -90,6 +97,11 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
const std::string term_;
+
+ // The start index of the given term in the search query
+ int term_start_index_;
+ // The length of the given unnormalized term in the search query
+ int unnormalized_term_length_;
// The accessor of the posting list chain for the requested term.
std::unique_ptr<PostingListHitAccessor> posting_list_accessor_;
@@ -124,10 +136,13 @@ class DocHitInfoIteratorTermMainExact : public DocHitInfoIteratorTermMain {
public:
explicit DocHitInfoIteratorTermMainExact(MainIndex* main_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
- : DocHitInfoIteratorTermMain(main_index, term, section_restrict_mask,
- need_hit_term_frequency) {}
+ : DocHitInfoIteratorTermMain(
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_restrict_mask, need_hit_term_frequency) {}
std::string ToString() const override;
@@ -139,10 +154,13 @@ class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain {
public:
explicit DocHitInfoIteratorTermMainPrefix(MainIndex* main_index,
const std::string& term,
+ int term_start_index,
+ int unnormalized_term_length,
SectionIdMask section_restrict_mask,
bool need_hit_term_frequency)
- : DocHitInfoIteratorTermMain(main_index, term, section_restrict_mask,
- need_hit_term_frequency) {}
+ : DocHitInfoIteratorTermMain(
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_restrict_mask, need_hit_term_frequency) {}
std::string ToString() const override;
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 6fd63e2..e181330 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -161,7 +161,7 @@ class MainIndex {
if (main_lexicon_->Sync() && flash_index_storage_->PersistToDisk()) {
return libtextclassifier3::Status::OK;
}
- return absl_ports::InternalError("Unable to sync lite index components.");
+ return absl_ports::InternalError("Unable to sync main index components.");
}
DocumentId last_added_document_id() const {
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index 816d46e..ac724b0 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -52,18 +52,20 @@ std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
}
std::vector<DocHitInfo> GetExactHits(
- MainIndex* main_index, const std::string& term,
- SectionIdMask section_mask = kSectionIdMaskAll) {
+ MainIndex* main_index, int term_start_index, int unnormalized_term_length,
+ const std::string& term, SectionIdMask section_mask = kSectionIdMaskAll) {
auto iterator = std::make_unique<DocHitInfoIteratorTermMainExact>(
- main_index, term, section_mask, /*need_hit_term_frequency=*/true);
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_mask, /*need_hit_term_frequency=*/true);
return GetHits(std::move(iterator));
}
std::vector<DocHitInfo> GetPrefixHits(
- MainIndex* main_index, const std::string& term,
- SectionIdMask section_mask = kSectionIdMaskAll) {
+ MainIndex* main_index, int term_start_index, int unnormalized_term_length,
+ const std::string& term, SectionIdMask section_mask = kSectionIdMaskAll) {
auto iterator = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
- main_index, term, section_mask, /*need_hit_term_frequency=*/true);
+ main_index, term, term_start_index, unnormalized_term_length,
+ section_mask, /*need_hit_term_frequency=*/true);
return GetHits(std::move(iterator));
}
@@ -271,9 +273,12 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
EXPECT_THAT(hits, IsEmpty());
- hits = GetPrefixHits(main_index.get(), "fo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "fo");
EXPECT_THAT(hits, IsEmpty());
// 3. Merge the index. The main index should contain "fool", "foot"
@@ -281,7 +286,8 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
// should not be present because it is not a branch point.
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list.
- hits = GetExactHits(main_index.get(), "foot");
+ hits = GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
// We should get hits for "foot" in doc1 and doc0
EXPECT_THAT(
hits,
@@ -292,7 +298,8 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
std::vector<SectionId>{doc0_hit.section_id()})));
// Get hits from a branching point posting list. "fo" should redirect to "foo"
- hits = GetPrefixHits(main_index.get(), "fo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "fo");
// We should get hits for "foot" in doc1 and "fool" in doc1. We shouldn't get
// the hits for "foot" in doc0 and "fool" in doc0 and doc2 because they
// weren't hits in prefix sections.
@@ -390,7 +397,9 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
// and "fall", a branch points for "fou" and backfill points for "fo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list the existed before the merge.
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
// We should get hits for "foot" in doc3, doc1 and doc0
EXPECT_THAT(
@@ -403,7 +412,8 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
EqualsDocHitInfo(doc0_hit.document_id(),
std::vector<SectionId>{doc0_hit.section_id()})));
// Get hits from backfill posting list.
- hits = GetPrefixHits(main_index.get(), "fo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "fo");
// We should get hits for "four" and "foul" in doc4 and hits for "foot" and
// "fool" in doc1. We shouldn't get the hits for "foot" in doc0 and doc3,
// "fool" in doc0 and doc2 or the hits for "four" and "foul" in doc4 because
@@ -455,7 +465,9 @@ TEST_F(MainIndexTest, ExactRetrievedInPrefixSearch) {
// 3. Merge the lite lexicon. The main lexicon should contain "foot" and
// "foo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
- std::vector<DocHitInfo> hits = GetPrefixHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
// We should get hits for "foo" in doc1 and doc0, but not in doc2 because it
// is not a prefix hit.
EXPECT_THAT(
@@ -504,7 +516,9 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
// 3. Merge the lite lexicon. The main lexicon should contain "foot" and
// "foo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
// We should get hits for "foo" in doc2 and doc1, but not in doc0 because it
// is not an exact hit.
@@ -554,7 +568,9 @@ TEST_F(MainIndexTest, SearchChainedPostingLists) {
// 3. Merge the lite index.
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits for all documents containing "foot" - which should be all of them.
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foot");
EXPECT_THAT(hits, SizeIs(2048));
EXPECT_THAT(hits.front(),
@@ -607,11 +623,14 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
// and a backfill point for "foo".
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list the existed before the merge.
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
EXPECT_THAT(hits, IsEmpty());
// Get hits from backfill posting list.
- hits = GetPrefixHits(main_index.get(), "foo");
+ hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
// We should get a hit for "fool" in doc0.
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
doc0_hit.document_id(),
@@ -642,7 +661,9 @@ TEST_F(MainIndexTest, OneHitInTheFirstPageForTwoPagesMainIndex) {
&icing_filesystem_));
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
- std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
+ std::vector<DocHitInfo> hits =
+ GetExactHits(main_index.get(), /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, "foo");
ASSERT_THAT(hits, SizeIs(num_docs));
for (DocumentId document_id = num_docs - 1; document_id >= 0; --document_id) {
ASSERT_THAT(
diff --git a/icing/index/main/posting-list-hit-accessor.cc b/icing/index/main/posting-list-hit-accessor.cc
index ba27bb1..3d5476b 100644
--- a/icing/index/main/posting-list-hit-accessor.cc
+++ b/icing/index/main/posting-list-hit-accessor.cc
@@ -20,7 +20,6 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/main/posting-list-hit-serializer.h"
@@ -32,24 +31,19 @@ namespace lib {
libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
PostingListHitAccessor::Create(FlashIndexStorage *storage,
PostingListHitSerializer *serializer) {
- uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- storage->block_size(), serializer->GetDataTypeBytes());
- std::unique_ptr<uint8_t[]> posting_list_buffer_array =
- std::make_unique<uint8_t[]>(max_posting_list_bytes);
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list_buffer,
- PostingListUsed::CreateFromUnitializedRegion(
- serializer, posting_list_buffer_array.get(), max_posting_list_bytes));
+ uint32_t max_posting_list_bytes = storage->max_posting_list_bytes();
+ ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer, max_posting_list_bytes));
return std::unique_ptr<PostingListHitAccessor>(new PostingListHitAccessor(
- storage, serializer, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer)));
+ storage, serializer, std::move(in_memory_posting_list)));
}
libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
PostingListHitAccessor::CreateFromExisting(
FlashIndexStorage *storage, PostingListHitSerializer *serializer,
PostingListIdentifier existing_posting_list_id) {
- // Our posting_list_buffer_ will start as empty.
+ // Our in_memory_posting_list_ will start as empty.
ICING_ASSIGN_OR_RETURN(std::unique_ptr<PostingListHitAccessor> pl_accessor,
Create(storage, serializer));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
@@ -73,20 +67,23 @@ PostingListHitAccessor::GetNextHitsBatch() {
ICING_ASSIGN_OR_RETURN(
std::vector<Hit> batch,
serializer_->GetHits(&preexisting_posting_list_->posting_list));
- uint32_t next_block_index;
+ uint32_t next_block_index = kInvalidBlockIndex;
// Posting lists will only be chained when they are max-sized, in which case
- // block.next_block_index() will point to the next block for the next posting
- // list. Otherwise, block.next_block_index() can be kInvalidBlockIndex or be
- // used to point to the next free list block, which is not relevant here.
- if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
- next_block_index = preexisting_posting_list_->block.next_block_index();
- } else {
- next_block_index = kInvalidBlockIndex;
+ // next_block_index will point to the next block for the next posting list.
+ // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+ // to the next free list block, which is not relevant here.
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ next_block_index = preexisting_posting_list_->next_block_index;
}
+
if (next_block_index != kInvalidBlockIndex) {
+ // Since we only have to deal with next block for max-sized posting list
+ // block, max_num_posting_lists is 1 and posting_list_index_bits is
+ // BitsToStore(1).
PostingListIdentifier next_posting_list_id(
next_block_index, /*posting_list_index=*/0,
- preexisting_posting_list_->block.posting_list_index_bits());
+ /*posting_list_index_bits=*/BitsToStore(1));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
storage_->GetPostingList(next_posting_list_id));
preexisting_posting_list_ =
@@ -101,7 +98,7 @@ PostingListHitAccessor::GetNextHitsBatch() {
libtextclassifier3::Status PostingListHitAccessor::PrependHit(const Hit &hit) {
PostingListUsed &active_pl = (preexisting_posting_list_ != nullptr)
? preexisting_posting_list_->posting_list
- : posting_list_buffer_;
+ : in_memory_posting_list_;
libtextclassifier3::Status status = serializer_->PrependHit(&active_pl, hit);
if (!absl_ports::IsResourceExhausted(status)) {
return status;
@@ -110,16 +107,16 @@ libtextclassifier3::Status PostingListHitAccessor::PrependHit(const Hit &hit) {
// we need to either move those hits to a larger posting list or flush this
// posting list and create another max-sized posting list in the chain.
if (preexisting_posting_list_ != nullptr) {
- FlushPreexistingPostingList();
+ ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
} else {
ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
}
- // Re-add hit. Should always fit since we just cleared posting_list_buffer_.
- // It's fine to explicitly reference posting_list_buffer_ here because there's
- // no way of reaching this line while preexisting_posting_list_ is still in
- // use.
- return serializer_->PrependHit(&posting_list_buffer_, hit);
+ // Re-add hit. Should always fit since we just cleared
+ // in_memory_posting_list_. It's fine to explicitly reference
+ // in_memory_posting_list_ here because there's no way of reaching this line
+ // while preexisting_posting_list_ is still in use.
+ return serializer_->PrependHit(&in_memory_posting_list_, hit);
}
} // namespace lib
diff --git a/icing/index/main/posting-list-hit-accessor.h b/icing/index/main/posting-list-hit-accessor.h
index e2c8dbe..7b72437 100644
--- a/icing/index/main/posting-list-hit-accessor.h
+++ b/icing/index/main/posting-list-hit-accessor.h
@@ -86,12 +86,10 @@ class PostingListHitAccessor : public PostingListAccessor {
libtextclassifier3::Status PrependHit(const Hit& hit);
private:
- explicit PostingListHitAccessor(
- FlashIndexStorage* storage, PostingListHitSerializer* serializer,
- std::unique_ptr<uint8_t[]> posting_list_buffer_array,
- PostingListUsed posting_list_buffer)
- : PostingListAccessor(storage, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer)),
+ explicit PostingListHitAccessor(FlashIndexStorage* storage,
+ PostingListHitSerializer* serializer,
+ PostingListUsed in_memory_posting_list)
+ : PostingListAccessor(storage, std::move(in_memory_posting_list)),
serializer_(serializer) {}
PostingListHitSerializer* serializer_; // Does not own.
diff --git a/icing/index/main/posting-list-hit-accessor_test.cc b/icing/index/main/posting-list-hit-accessor_test.cc
index 8f4ca42..1127814 100644
--- a/icing/index/main/posting-list-hit-accessor_test.cc
+++ b/icing/index/main/posting-list-hit-accessor_test.cc
@@ -93,7 +93,7 @@ TEST_F(PostingListHitAccessorTest, HitsAddAndRetrieveProperly) {
flash_index_storage_->GetPostingList(result.id));
EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
- EXPECT_THAT(pl_holder.block.next_block_index(), Eq(kInvalidBlockIndex));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
}
TEST_F(PostingListHitAccessorTest, PreexistingPLKeepOnSameBlock) {
@@ -223,7 +223,7 @@ TEST_F(PostingListHitAccessorTest, MultiBlockChainsBlocksProperly) {
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
@@ -290,7 +290,7 @@ TEST_F(PostingListHitAccessorTest, PreexistingMultiBlockReusesBlocksProperly) {
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
diff --git a/icing/index/main/posting-list-hit-serializer_test.cc b/icing/index/main/posting-list-hit-serializer_test.cc
index 5d96ad9..ffd8166 100644
--- a/icing/index/main/posting-list-hit-serializer_test.cc
+++ b/icing/index/main/posting-list-hit-serializer_test.cc
@@ -53,11 +53,9 @@ TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
static const int kNumHits = 2551;
static const size_t kHitsSize = kNumHits * sizeof(Hit);
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), kHitsSize));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// Make used.
Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
@@ -102,11 +100,9 @@ TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
PostingListHitSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// Fill up the compressed region.
// Transitions:
@@ -171,14 +167,10 @@ TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
TEST(PostingListHitSerializerTest, PostingListUsedMinSize) {
PostingListHitSerializer serializer;
- std::unique_ptr<char[]> hits_buf =
- std::make_unique<char[]>(serializer.GetMinPostingListSize());
-
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()),
- serializer.GetMinPostingListSize()));
+ &serializer, serializer.GetMinPostingListSize()));
// PL State: EMPTY
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
@@ -220,15 +212,11 @@ TEST(PostingListHitSerializerTest,
PostingListPrependHitArrayMinSizePostingList) {
PostingListHitSerializer serializer;
- constexpr int kFinalSize = 1025;
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kFinalSize);
-
// Min Size = 10
int size = serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<HitElt> hits_in;
hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
@@ -270,11 +258,9 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
// Size = 30
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<HitElt> hits_in;
hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
@@ -442,8 +428,6 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
static constexpr size_t kHitsSize =
((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5;
- std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
-
// Create an array with one too many hits
std::vector<Hit> hits_in_too_many =
CreateHits(kNumHits + 1, /*desired_byte_length=*/1);
@@ -454,8 +438,7 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()),
- serializer.GetMinPostingListSize()));
+ &serializer, serializer.GetMinPostingListSize()));
// PrependHitArray should fail because hit_elts_in_too_many is far too large
// for the minimum size pl.
@@ -467,8 +450,7 @@ TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
ICING_ASSERT_OK_AND_ASSIGN(
pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf.get()), kHitsSize));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// PrependHitArray should fail because hit_elts_in_too_many is one hit too
// large for this pl.
num_could_fit = serializer.PrependHitArray<HitElt, HitElt::get_hit>(
@@ -483,10 +465,9 @@ TEST(PostingListHitSerializerTest,
PostingListHitSerializer serializer;
const uint32_t pl_size = 3 * sizeof(Hit);
- char hits_buf[pl_size];
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, hits_buf, pl_size));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue - 1, 0)));
uint32_t bytes_used = serializer.GetBytesUsed(&pl);
// Status not full.
@@ -503,10 +484,10 @@ TEST(PostingListHitSerializerTest,
TEST(PostingListHitSerializerTest, DeltaOverflow) {
PostingListHitSerializer serializer;
- char hits_buf[1000];
- ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, hits_buf, 4 * sizeof(Hit)));
+ const uint32_t pl_size = 4 * sizeof(Hit);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl,
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
static const Hit::Value kOverflow[4] = {
Hit::kInvalidValue >> 2,
@@ -521,8 +502,8 @@ TEST(PostingListHitSerializerTest, DeltaOverflow) {
}
// Cannot fit 4 overflow values.
- ICING_ASSERT_OK_AND_ASSIGN(pl, PostingListUsed::CreateFromUnitializedRegion(
- &serializer, hits_buf, 4 * sizeof(Hit)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[3])));
ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[2])));
@@ -536,22 +517,18 @@ TEST(PostingListHitSerializerTest, MoveFrom) {
PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -568,11 +545,9 @@ TEST(PostingListHitSerializerTest, MoveFromNullArgumentReturnsInvalidArgument) {
PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
@@ -589,22 +564,18 @@ TEST(PostingListHitSerializerTest,
PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -613,7 +584,7 @@ TEST(PostingListHitSerializerTest,
// Write invalid hits to the beginning of pl_used1 to make it invalid.
Hit invalid_hit;
- Hit *first_hit = reinterpret_cast<Hit *>(hits_buf1.get());
+ Hit *first_hit = reinterpret_cast<Hit *>(pl_used1.posting_list_buffer());
*first_hit = invalid_hit;
++first_hit;
*first_hit = invalid_hit;
@@ -628,22 +599,18 @@ TEST(PostingListHitSerializerTest,
PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -652,7 +619,7 @@ TEST(PostingListHitSerializerTest,
// Write invalid hits to the beginning of pl_used2 to make it invalid.
Hit invalid_hit;
- Hit *first_hit = reinterpret_cast<Hit *>(hits_buf2.get());
+ Hit *first_hit = reinterpret_cast<Hit *>(pl_used2.posting_list_buffer());
*first_hit = invalid_hit;
++first_hit;
*first_hit = invalid_hit;
@@ -666,24 +633,19 @@ TEST(PostingListHitSerializerTest, MoveToPostingListTooSmall) {
PostingListHitSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
- std::unique_ptr<char[]> hits_buf2 =
- std::make_unique<char[]>(serializer.GetMinPostingListSize());
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf2.get()),
- serializer.GetMinPostingListSize()));
+ &serializer, serializer.GetMinPostingListSize()));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
@@ -702,11 +664,9 @@ TEST(PostingListHitSerializerTest, PopHitsWithScores) {
PostingListHitSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void *>(hits_buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// This posting list is 20-bytes. Create four hits that will have deltas of
// two bytes each and all of whom will have a non-default score. This posting
diff --git a/icing/index/numeric/doc-hit-info-iterator-numeric.h b/icing/index/numeric/doc-hit-info-iterator-numeric.h
index 1bfd193..bf990d1 100644
--- a/icing/index/numeric/doc-hit-info-iterator-numeric.h
+++ b/icing/index/numeric/doc-hit-info-iterator-numeric.h
@@ -20,6 +20,7 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/util/status-macros.h"
@@ -35,12 +36,23 @@ class DocHitInfoIteratorNumeric : public DocHitInfoIterator {
: numeric_index_iter_(std::move(numeric_index_iter)) {}
libtextclassifier3::Status Advance() override {
+ // If the query property path doesn't exist (i.e. the storage doesn't
+ // exist), then numeric_index_iter_ will be nullptr.
+ if (numeric_index_iter_ == nullptr) {
+ return absl_ports::ResourceExhaustedError("End of iterator");
+ }
+
ICING_RETURN_IF_ERROR(numeric_index_iter_->Advance());
doc_hit_info_ = numeric_index_iter_->GetDocHitInfo();
return libtextclassifier3::Status::OK;
}
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+ return absl_ports::UnimplementedError(
+ "Cannot trim right most node in numeric operator.");
+ }
+
int32_t GetNumBlocksInspected() const override { return 0; }
int32_t GetNumLeafAdvanceCalls() const override { return 0; }
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index 1b7b5ae..164866c 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -72,11 +72,27 @@ class DummyNumericIndex : public NumericIndex<T> {
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
std::string_view property_path, T key_lower, T key_upper) const override;
- libtextclassifier3::Status Reset() override {
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
+
+ libtextclassifier3::Status Clear() override {
storage_.clear();
+ last_added_document_id_ = kInvalidDocumentId;
return libtextclassifier3::Status::OK;
}
+ DocumentId last_added_document_id() const override {
+ return last_added_document_id_;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) override {
+ if (last_added_document_id_ == kInvalidDocumentId ||
+ document_id > last_added_document_id_) {
+ last_added_document_id_ = document_id;
+ }
+ }
+
private:
class Editor : public NumericIndex<T>::Editor {
public:
@@ -95,12 +111,12 @@ class DummyNumericIndex : public NumericIndex<T> {
return libtextclassifier3::Status::OK;
}
- libtextclassifier3::Status IndexAllBufferedKeys() override;
+ libtextclassifier3::Status IndexAllBufferedKeys() && override;
private:
std::unordered_set<T> seen_keys_;
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>&
- storage_;
+ storage_; // Does not own.
};
class Iterator : public NumericIndex<T>::Iterator {
@@ -164,7 +180,8 @@ class DummyNumericIndex : public NumericIndex<T> {
explicit DummyNumericIndex(const Filesystem& filesystem,
std::string&& working_path)
: NumericIndex<T>(filesystem, std::move(working_path),
- PersistentStorage::WorkingPathType::kDummy) {}
+ PersistentStorage::WorkingPathType::kDummy),
+ last_added_document_id_(kInvalidDocumentId) {}
libtextclassifier3::Status PersistStoragesToDisk() override {
return libtextclassifier3::Status::OK;
@@ -187,11 +204,12 @@ class DummyNumericIndex : public NumericIndex<T> {
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>> storage_;
PersistentStorage::Crcs dummy_crcs_;
+ DocumentId last_added_document_id_;
};
template <typename T>
libtextclassifier3::Status
-DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() {
+DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() && {
auto property_map_iter = storage_.find(this->property_path_);
if (property_map_iter == storage_.end()) {
const auto& [inserted_iter, insert_result] =
@@ -223,7 +241,7 @@ DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() {
template <typename T>
libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
if (pq_.empty()) {
- return absl_ports::OutOfRangeError("End of iterator");
+ return absl_ports::ResourceExhaustedError("End of iterator");
}
DocumentId document_id = pq_.top().GetCurrentBasicHit().document_id();
@@ -255,8 +273,8 @@ DummyNumericIndex<T>::GetIterator(std::string_view property_path, T key_lower,
auto property_map_iter = storage_.find(std::string(property_path));
if (property_map_iter == storage_.end()) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Property \"", property_path, "\" not found"));
+ // Return an empty iterator.
+ return std::make_unique<DocHitInfoIteratorNumeric<T>>(nullptr);
}
std::vector<typename Iterator::BucketInfo> bucket_info_vec;
@@ -272,6 +290,38 @@ DummyNumericIndex<T>::GetIterator(std::string_view property_path, T key_lower,
std::move(bucket_info_vec)));
}
+template <typename T>
+libtextclassifier3::Status DummyNumericIndex<T>::Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) {
+ std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>
+ new_storage;
+
+ for (const auto& [property_path, old_property_map] : storage_) {
+ std::map<T, std::vector<BasicHit>> new_property_map;
+ for (const auto& [key, hits] : old_property_map) {
+ for (const BasicHit& hit : hits) {
+ DocumentId old_doc_id = hit.document_id();
+ if (old_doc_id >= document_id_old_to_new.size() ||
+ document_id_old_to_new[old_doc_id] == kInvalidDocumentId) {
+ continue;
+ }
+
+ new_property_map[key].push_back(
+ BasicHit(hit.section_id(), document_id_old_to_new[old_doc_id]));
+ }
+ }
+
+ if (!new_property_map.empty()) {
+ new_storage[property_path] = std::move(new_property_map);
+ }
+ }
+
+ storage_ = std::move(new_storage);
+ last_added_document_id_ = new_last_added_document_id;
+ return libtextclassifier3::Status::OK;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index 0233b38..22ef8bd 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -15,6 +15,8 @@
#include "icing/index/numeric/integer-index-storage.h"
#include <algorithm>
+#include <cstdint>
+#include <functional>
#include <limits>
#include <memory>
#include <queue>
@@ -119,7 +121,8 @@ class BucketPostingListIterator {
//
// Returns:
// - OK on success
- // - OUT_OF_RANGE_ERROR if reaching the end (i.e. no more relevant data)
+ // - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+ // data)
// - Any other PostingListIntegerIndexAccessor errors
libtextclassifier3::Status AdvanceAndFilter(int64_t query_key_lower,
int64_t query_key_upper) {
@@ -159,7 +162,7 @@ class BucketPostingListIterator {
curr_ = cached_batch_integer_index_data_.cbegin();
if (cached_batch_integer_index_data_.empty()) {
- return absl_ports::OutOfRangeError("End of iterator");
+ return absl_ports::ResourceExhaustedError("End of iterator");
}
return libtextclassifier3::Status::OK;
@@ -212,7 +215,8 @@ class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
//
// Returns:
// - OK on success
- // - OUT_OF_RANGE_ERROR if reaching the end (i.e. no more relevant data)
+ // - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+ // data)
// - Any BucketPostingListIterator errors
libtextclassifier3::Status Advance() override;
@@ -243,7 +247,7 @@ class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
if (pq_.empty()) {
- return absl_ports::OutOfRangeError("End of iterator");
+ return absl_ports::ResourceExhaustedError("End of iterator");
}
DocumentId document_id = pq_.top()->GetCurrentBasicHit().document_id();
@@ -325,8 +329,9 @@ IntegerIndexStorage::Create(
!filesystem.FileExists(
GetFlashIndexStorageFilePath(working_path).c_str())) {
// Discard working_path if any of them is missing, and reinitialize.
- ICING_RETURN_IF_ERROR(
- PersistentStorage::Discard(filesystem, working_path, kWorkingPathType));
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
return InitializeNewFiles(filesystem, std::move(working_path),
std::move(options), posting_list_serializer);
}
@@ -509,6 +514,8 @@ libtextclassifier3::Status IntegerIndexStorage::AddKeys(
// length of the unsorted bucket array exceeds the threshold.
// TODO(b/259743562): [Optimization 1] implement merge
+ info().num_data += new_keys.size();
+
return libtextclassifier3::Status::OK;
}
@@ -569,6 +576,98 @@ IntegerIndexStorage::GetIterator(int64_t query_key_lower,
query_key_lower, query_key_upper, std::move(bucket_pl_iters)));
}
+libtextclassifier3::Status IntegerIndexStorage::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndexStorage* new_storage) const {
+ // Discard all pre-existing buckets in new_storage since we will append newly
+ // merged buckets gradually into new_storage.
+ if (new_storage->sorted_buckets_->num_elements() > 0) {
+ ICING_RETURN_IF_ERROR(new_storage->sorted_buckets_->TruncateTo(0));
+ }
+ if (new_storage->unsorted_buckets_->num_elements() > 0) {
+ ICING_RETURN_IF_ERROR(new_storage->unsorted_buckets_->TruncateTo(0));
+ }
+
+ // "Reference sort" the original storage buckets.
+ std::vector<std::reference_wrapper<const Bucket>> temp_buckets;
+ temp_buckets.reserve(sorted_buckets_->num_elements() +
+ unsorted_buckets_->num_elements());
+ temp_buckets.insert(
+ temp_buckets.end(), sorted_buckets_->array(),
+ sorted_buckets_->array() + sorted_buckets_->num_elements());
+ temp_buckets.insert(
+ temp_buckets.end(), unsorted_buckets_->array(),
+ unsorted_buckets_->array() + unsorted_buckets_->num_elements());
+ std::sort(temp_buckets.begin(), temp_buckets.end(),
+ [](const std::reference_wrapper<const Bucket>& lhs,
+ const std::reference_wrapper<const Bucket>& rhs) -> bool {
+ return lhs.get() < rhs.get();
+ });
+
+ int64_t curr_key_lower = std::numeric_limits<int64_t>::min();
+ int64_t curr_key_upper = std::numeric_limits<int64_t>::min();
+ std::vector<IntegerIndexData> accumulated_data;
+ for (const std::reference_wrapper<const Bucket>& bucket_ref : temp_buckets) {
+ // Read all data from the bucket.
+ std::vector<IntegerIndexData> new_data;
+ if (bucket_ref.get().posting_list_identifier().is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> old_pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ bucket_ref.get().posting_list_identifier()));
+
+ ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch_old_data,
+ old_pl_accessor->GetNextDataBatch());
+ while (!batch_old_data.empty()) {
+ for (const IntegerIndexData& old_data : batch_old_data) {
+ DocumentId new_document_id =
+ old_data.basic_hit().document_id() < document_id_old_to_new.size()
+ ? document_id_old_to_new[old_data.basic_hit().document_id()]
+ : kInvalidDocumentId;
+ // Transfer the document id of the hit if the document is not deleted
+ // or outdated.
+ if (new_document_id != kInvalidDocumentId) {
+ new_data.push_back(
+ IntegerIndexData(old_data.basic_hit().section_id(),
+ new_document_id, old_data.key()));
+ }
+ }
+ ICING_ASSIGN_OR_RETURN(batch_old_data,
+ old_pl_accessor->GetNextDataBatch());
+ }
+ }
+
+ // Decide whether:
+ // - Flush accumulated_data and create a new bucket for them.
+ // - OR merge new_data into accumulated_data and go to the next round.
+ if (!accumulated_data.empty() && accumulated_data.size() + new_data.size() >
+ kNumDataThresholdForBucketMerge) {
+ // TODO(b/259743562): [Optimization 3] adjust upper bound to fit more data
+ // from new_data to accumulated_data.
+ ICING_RETURN_IF_ERROR(FlushDataIntoNewSortedBucket(
+ curr_key_lower, curr_key_upper, std::move(accumulated_data),
+ new_storage));
+
+ curr_key_lower = bucket_ref.get().key_lower();
+ accumulated_data = std::move(new_data);
+ } else {
+ // We can just append to accumulated data because
+ // FlushDataIntoNewSortedBucket will take care of sorting the contents.
+ std::move(new_data.begin(), new_data.end(),
+ std::back_inserter(accumulated_data));
+ }
+ curr_key_upper = bucket_ref.get().key_upper();
+ }
+
+ // Add the last round of bucket.
+ ICING_RETURN_IF_ERROR(
+ FlushDataIntoNewSortedBucket(curr_key_lower, curr_key_upper,
+ std::move(accumulated_data), new_storage));
+
+ return libtextclassifier3::Status::OK;
+}
+
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
IntegerIndexStorage::InitializeNewFiles(
const Filesystem& filesystem, std::string&& working_path, Options&& options,
@@ -665,7 +764,7 @@ IntegerIndexStorage::InitializeNewFiles(
// Initialize info content by writing mapped memory directly.
Info& info_ref = new_integer_index_storage->info();
info_ref.magic = Info::kMagic;
- info_ref.num_keys = 0;
+ info_ref.num_data = 0;
// Initialize new PersistentStorage. The initial checksums will be computed
// and set via InitializeNewStorage.
ICING_RETURN_IF_ERROR(new_integer_index_storage->InitializeNewStorage());
@@ -736,6 +835,40 @@ IntegerIndexStorage::InitializeExistingFiles(
return integer_index_storage;
}
+/* static */ libtextclassifier3::Status
+IntegerIndexStorage::FlushDataIntoNewSortedBucket(
+ int64_t key_lower, int64_t key_upper, std::vector<IntegerIndexData>&& data,
+ IntegerIndexStorage* storage) {
+ if (data.empty()) {
+ return storage->sorted_buckets_->Append(
+ Bucket(key_lower, key_upper, PostingListIdentifier::kInvalid));
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> new_pl_accessor,
+ PostingListIntegerIndexAccessor::Create(
+ storage->flash_index_storage_.get(),
+ storage->posting_list_serializer_));
+
+ std::sort(data.begin(), data.end());
+ for (auto itr = data.rbegin(); itr != data.rend(); ++itr) {
+ ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(*itr));
+ }
+
+ PostingListAccessor::FinalizeResult result =
+ std::move(*new_pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+ if (!result.id.is_valid()) {
+ return absl_ports::InternalError("Fail to flush data into posting list");
+ }
+
+ storage->info().num_data += data.size();
+ return storage->sorted_buckets_->Append(
+ Bucket(key_lower, key_upper, result.id));
+}
+
libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk() {
ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
@@ -808,5 +941,58 @@ IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
return std::vector<Bucket>();
}
+
+libtextclassifier3::Status IntegerIndexStorage::SortBuckets() {
+ if (unsorted_buckets_->num_elements() == 0) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ int32_t sorted_len = sorted_buckets_->num_elements();
+ int32_t unsorted_len = unsorted_buckets_->num_elements();
+ if (sorted_len > FileBackedVector<Bucket>::kMaxNumElements - unsorted_len) {
+ return absl_ports::OutOfRangeError(
+ "Sorted buckets length exceeds the limit after merging");
+ }
+
+ ICING_RETURN_IF_ERROR(sorted_buckets_->Allocate(unsorted_len));
+
+ // Sort unsorted_buckets_.
+ ICING_RETURN_IF_ERROR(
+ unsorted_buckets_->Sort(/*begin_idx=*/0, /*end_idx=*/unsorted_len));
+
+ // Merge unsorted_buckets_ into sorted_buckets_ and clear unsorted_buckets_.
+ // Note that we could have used std::sort + std::inplace_merge, but it is more
+ // complicated to deal with FileBackedVector SetDirty logic, so implement our
+ // own merging with FileBackedVector methods.
+ //
+ // Merge buckets from back. This could save some iterations and avoid setting
+ // dirty for unchanged elements of the original sorted segments.
+ // For example, we can avoid setting dirty for elements [1, 2, 3, 5] for the
+ // following sorted/unsorted data:
+ // - sorted: [1, 2, 3, 5, 8, 13, _, _, _, _)]
+ // - unsorted: [6, 10, 14, 15]
+ int32_t sorted_write_idx = sorted_len + unsorted_len - 1;
+ int32_t sorted_curr_idx = sorted_len - 1;
+ int32_t unsorted_curr_idx = unsorted_len - 1;
+ while (unsorted_curr_idx >= 0) {
+ if (sorted_curr_idx >= 0 && unsorted_buckets_->array()[unsorted_curr_idx] <
+ sorted_buckets_->array()[sorted_curr_idx]) {
+ ICING_RETURN_IF_ERROR(sorted_buckets_->Set(
+ sorted_write_idx, sorted_buckets_->array()[sorted_curr_idx]));
+ --sorted_curr_idx;
+
+ } else {
+ ICING_RETURN_IF_ERROR(sorted_buckets_->Set(
+ sorted_write_idx, unsorted_buckets_->array()[unsorted_curr_idx]));
+ --unsorted_curr_idx;
+ }
+ --sorted_write_idx;
+ }
+
+ ICING_RETURN_IF_ERROR(unsorted_buckets_->TruncateTo(0));
+
+ return libtextclassifier3::Status::OK;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
index bef8282..be0add9 100644
--- a/icing/index/numeric/integer-index-storage.h
+++ b/icing/index/numeric/integer-index-storage.h
@@ -77,7 +77,7 @@ class IntegerIndexStorage : public PersistentStorage {
static constexpr int32_t kMagic = 0xc4bf0ccc;
int32_t magic;
- int32_t num_keys;
+ int32_t num_data;
Crc32 ComputeChecksum() const {
return Crc32(
@@ -176,6 +176,14 @@ class IntegerIndexStorage : public PersistentStorage {
WorkingPathType::kDirectory;
static constexpr std::string_view kFilePrefix = "integer_index_storage";
+ // # of data threshold for bucket merging. If total # data of adjacent buckets
+ // exceed this value, then flush the accumulated data. Otherwise merge
+ // buckets and their data.
+ //
+ // Calculated by: 0.7 * (kMaxPostingListSize / sizeof(IntegerIndexData)),
+ // where kMaxPostingListSize = (kPageSize - sizeof(IndexBlock::BlockHeader)).
+ static constexpr int32_t kNumDataThresholdForBucketMerge = 240;
+
// Creates a new IntegerIndexStorage instance to index integers (for a single
// property). If any of the underlying file is missing, then delete the whole
// working_path and (re)initialize with new ones. Otherwise initialize and
@@ -258,6 +266,26 @@ class IntegerIndexStorage : public PersistentStorage {
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
int64_t query_key_lower, int64_t query_key_upper) const;
+ // Transfers integer index data from the current storage to new_storage and
+ // optimizes buckets (for new_storage only), i.e. merging adjacent buckets if
+ // total # of data among them are less than or equal to
+ // kNumDataThresholdForBucketMerge.
+ //
+ // REQUIRES: new_storage should be a newly created storage instance, i.e. not
+ // contain any data. Otherwise, existing data and posting lists won't be
+ // freed and space will be wasted.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if sorted buckets length exceeds the limit after
+ // merging
+ // - INTERNAL_ERROR on IO error
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndexStorage* new_storage) const;
+
+ int32_t num_data() const { return info().num_data; }
+
private:
explicit IntegerIndexStorage(
const Filesystem& filesystem, std::string&& working_path,
@@ -288,6 +316,18 @@ class IntegerIndexStorage : public PersistentStorage {
Options&& options,
PostingListIntegerIndexSerializer* posting_list_serializer);
+ // Flushes data into posting list(s), creates a new bucket with range
+ // [key_lower, key_upper], and appends it into sorted buckets for storage.
+ // It is a helper function for TransferIndex.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR if fails to write existing data into posting list(s)
+ // - Any FileBackedVector or PostingList errors
+ static libtextclassifier3::Status FlushDataIntoNewSortedBucket(
+ int64_t key_lower, int64_t key_upper,
+ std::vector<IntegerIndexData>&& data, IntegerIndexStorage* storage);
+
// Flushes contents of all storages to underlying files.
//
// Returns:
@@ -308,8 +348,9 @@ class IntegerIndexStorage : public PersistentStorage {
// - Crc of the Info on success
libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
- // Computes and returns all storages checksum. Checksums of bucket_storage_,
- // entry_storage_ and kv_storage_ will be combined together by XOR.
+ // Computes and returns all storages checksum. Checksums of sorted_buckets_,
+ // unsorted_buckets_ will be combined together by XOR.
+ // TODO(b/259744228): implement and include flash_index_storage checksum
//
// Returns:
// - Crc of all storages on success
@@ -345,6 +386,16 @@ class IntegerIndexStorage : public PersistentStorage {
const std::vector<int64_t>::const_iterator& it_end,
FileBackedVector<Bucket>::MutableView& mutable_bucket);
+ // Merges all unsorted buckets into sorted buckets and clears unsorted
+ // buckets.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if sorted buckets length exceeds the limit after
+ // merging
+ // - Any FileBackedVector errors
+ libtextclassifier3::Status SortBuckets();
+
Crcs& crcs() override {
return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
kCrcsMetadataFileOffset);
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
new file mode 100644
index 0000000..d150f2d
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -0,0 +1,270 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/numeric/number-generator.h"
+#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
+#include "icing/testing/tmp-directory.h"
+
+// Run on a Linux workstation:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/index/numeric:integer-index-storage_benchmark
+//
+// $ blaze-bin/icing/index/numeric/integer-index-storage_benchmark
+// --benchmark_filter=all --benchmark_memory_usage
+//
+// Run on an Android device:
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/index/numeric:integer-index-storage_benchmark
+//
+// $ adb push
+// blaze-bin/icing/index/numeric/integer-index-storage_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/integer-index-storage_benchmark
+// --benchmark_filter=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::SizeIs;
+
+static constexpr SectionId kDefaultSectionId = 12;
+static constexpr int kDefaultSeed = 12345;
+
+enum DistributionTypeEnum {
+ kUniformDistribution,
+};
+
+class IntegerIndexStorageBenchmark {
+ public:
+ Filesystem filesystem;
+ std::string working_path;
+
+ PostingListIntegerIndexSerializer posting_list_serializer;
+
+ explicit IntegerIndexStorageBenchmark()
+ : working_path(GetTestTempDir() + "/integer_index_benchmark") {}
+
+ ~IntegerIndexStorageBenchmark() {
+ filesystem.DeleteDirectoryRecursively(working_path.c_str());
+ }
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<NumberGenerator<int64_t>>>
+CreateIntegerGenerator(DistributionTypeEnum distribution_type, int seed,
+ int num_keys) {
+ switch (distribution_type) {
+ case DistributionTypeEnum::kUniformDistribution:
+ // Since the collision # follows poisson distribution with lambda =
+ // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too
+ // many collisions.
+ //
+ // Distribution:
+ // - keys in range being picked for 0 times: 90.5%
+ // - keys in range being picked for 1 time: 9%
+ // - keys in range being picked for 2 times: 0.45%
+ // - keys in range being picked for 3 times: 0.015%
+ //
+ // For example, num_keys = 1M, range = 10M. Then there will be ~904837
+ // unique keys, 45242 keys being picked twice, 1508 keys being picked
+ // thrice ...
+ return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
+ seed, /*range_lower=*/0,
+ /*range_upper=*/static_cast<int64_t>(num_keys) * 10 - 1);
+ default:
+ return absl_ports::InvalidArgumentError("Unknown type");
+ }
+}
+
+void BM_Index(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ std::vector<int64_t> keys(num_keys);
+ for (int i = 0; i < num_keys; ++i) {
+ keys[i] = generator->Generate();
+ }
+
+ IntegerIndexStorageBenchmark benchmark;
+ for (auto _ : state) {
+ state.PauseTiming();
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(),
+ &benchmark.posting_list_serializer));
+ state.ResumeTiming();
+
+ for (int i = 0; i < num_keys; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+ kDefaultSectionId, {keys[i]}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ state.PauseTiming();
+ storage.reset();
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_Index)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+
+void BM_BatchIndex(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ std::vector<int64_t> keys(num_keys);
+ for (int i = 0; i < num_keys; ++i) {
+ keys[i] = generator->Generate();
+ }
+
+ IntegerIndexStorageBenchmark benchmark;
+ for (auto _ : state) {
+ state.PauseTiming();
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(),
+ &benchmark.posting_list_serializer));
+ std::vector<int64_t> keys_copy(keys);
+ state.ResumeTiming();
+
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(0),
+ kDefaultSectionId, std::move(keys_copy)));
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ state.PauseTiming();
+ storage.reset();
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_BatchIndex)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+
+void BM_ExactQuery(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ IntegerIndexStorageBenchmark benchmark;
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(),
+ &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ std::unordered_map<int64_t, std::vector<DocumentId>> keys;
+ for (int i = 0; i < num_keys; ++i) {
+ int64_t key = generator->Generate();
+ keys[key].push_back(static_cast<DocumentId>(i));
+ ICING_ASSERT_OK(
+ storage->AddKeys(static_cast<DocumentId>(i), kDefaultSectionId, {key}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ for (auto _ : state) {
+ int64_t exact_query_key = generator->Generate();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iterator,
+ storage->GetIterator(/*query_key_lower=*/exact_query_key,
+ /*query_key_upper=*/exact_query_key));
+ int cnt = 0;
+ while (iterator->Advance().ok()) {
+ benchmark::DoNotOptimize(iterator->doc_hit_info());
+ ++cnt;
+ }
+
+ const auto it = keys.find(exact_query_key);
+ if (it == keys.end()) {
+ ASSERT_THAT(cnt, Eq(0));
+ } else {
+ ASSERT_THAT(it->second, SizeIs(cnt));
+ }
+ }
+}
+BENCHMARK(BM_ExactQuery)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
index 92fb912..9d6864c 100644
--- a/icing/index/numeric/integer-index-storage_test.cc
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -45,10 +45,12 @@ namespace {
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
+using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
+using ::testing::Le;
using ::testing::Ne;
using ::testing::Not;
@@ -243,7 +245,7 @@ TEST_F(IntegerIndexStorageTest, InitializeNewFiles) {
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
IntegerIndexStorage::kInfoMetadataFileOffset));
EXPECT_THAT(info.magic, Eq(Info::kMagic));
- EXPECT_THAT(info.num_keys, Eq(0));
+ EXPECT_THAT(info.num_data, Eq(0));
// Check crcs section
Crcs crcs;
@@ -331,6 +333,13 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
+ // Insert some data.
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
+ /*new_keys=*/{0, 100, -100}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, /*section_id=*/2,
+ /*new_keys=*/{3, -1000, 500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, /*section_id=*/15,
+ /*new_keys=*/{-6, 321, 98}));
ICING_ASSERT_OK_AND_ASSIGN(
doc_hit_info_vec,
Query(storage.get(),
@@ -423,10 +432,12 @@ TEST_F(IntegerIndexStorageTest,
// Modify info, but don't update the checksum. This would be similar to
// corruption of info.
- info.num_keys += kCorruptedValueOffset;
+ info.num_data += kCorruptedValueOffset;
ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
IntegerIndexStorage::kInfoMetadataFileOffset,
&info, sizeof(Info)));
+ metadata_sfd.reset();
+
{
// Attempt to create the integer index storage with info that doesn't match
// its checksum and confirm that it fails.
@@ -580,6 +591,7 @@ TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
/*new_keys=*/{300}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Exact query on key in each sorted bucket should get the correct result.
@@ -635,6 +647,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
/*new_keys=*/{2000}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Exact query on key in each unsorted bucket should get the correct result.
@@ -689,6 +702,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
/*new_keys=*/{20}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(4));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Exact query on key with multiple hits should get the correct result.
@@ -756,6 +770,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
/*new_keys=*/{300}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Range query on each sorted bucket boundary should get the correct result.
@@ -811,6 +826,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
/*new_keys=*/{2000}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(5));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Range query on each unsorted bucket boundary should get the correct result.
@@ -856,6 +872,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
/*new_keys=*/{30}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(2));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Range query on partial range of each sorted bucket should get the correct
@@ -906,6 +923,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
/*new_keys=*/{-72}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(2));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Range query on partial range of each unsorted bucket should get the correct
@@ -983,6 +1001,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) {
EXPECT_THAT(storage->AddKeys(/*document_id=*/9, kDefaultSectionId,
/*new_keys=*/{2000}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(10));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
// Range query should get the correct result.
@@ -1036,9 +1055,6 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) {
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
- // Sorted buckets: [(-1000,-100), (0,100), (150,199), (200,300), (301,999)]
- // Unsorted buckets: [(1000,INT64_MAX), (-99,-1), (101,149),
- // (INT64_MIN,-1001)]
// Batch add the following keys (including some edge cases) to test the
// correctness of the sort and binary search logic in AddKeys().
// clang-format off
@@ -1051,6 +1067,7 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) {
EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
std::vector<int64_t>(keys)),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(keys.size()));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
for (int64_t key : keys) {
@@ -1060,6 +1077,19 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) {
}
}
+TEST_F(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
+ serializer_.get()));
+
+ std::vector<int64_t> keys = {2, 3, 1, 2, 4, -1, -1, 100, 3};
+ EXPECT_THAT(
+ storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, std::move(keys)),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(6));
+}
+
TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
// We use predefined custom buckets to initialize new integer index storage
// and create some test keys accordingly.
@@ -1084,6 +1114,7 @@ TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
{-500, 1024, -200, 208, std::numeric_limits<int64_t>::max(), -1000,
300, std::numeric_limits<int64_t>::min(), -1500, 2000}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(10));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
EXPECT_THAT(
@@ -1144,6 +1175,7 @@ TEST_F(IntegerIndexStorageTest,
EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/54,
/*new_keys=*/{2000}),
IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(10));
std::vector<SectionId> expected_sections = {63, 62, 61, 60, 59,
58, 57, 56, 55, 54};
@@ -1154,6 +1186,397 @@ TEST_F(IntegerIndexStorageTest,
EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
}
+TEST_F(IntegerIndexStorageTest, TransferIndex) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{1024}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/13, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/21, kDefaultSectionId,
+ /*new_keys=*/{2048}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/34, kDefaultSectionId,
+ /*new_keys=*/{156}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/55, kDefaultSectionId,
+ /*new_keys=*/{20}));
+ ASSERT_THAT(storage->num_data(), Eq(9));
+
+ // Delete doc id = 5, 34, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(56, kInvalidDocumentId);
+ document_id_old_to_new[1] = 8;
+ document_id_old_to_new[2] = 3;
+ document_id_old_to_new[3] = 0;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 6;
+ document_id_old_to_new[21] = 1;
+ document_id_old_to_new[55] = 4;
+
+ // Transfer to new storage.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
+ Options(), serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ ICING_ASSERT_OK(new_storage->PersistToDisk());
+ }
+
+ // Verify after transferring and reinitializing the instance.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
+ Options(), serializer_.get()));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(new_storage->num_data(), Eq(7));
+
+ // -500 had hits for old_docids 1 and 13, which are now 6 and 8.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-500, /*key_upper=*/-500),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections))));
+
+ // 1024 had a hit for old_docid 2, which is now 3.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+ // -200 had a hit for old_docid 3, which is now 0.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-200, /*key_upper=*/-200),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+
+ // -60 had hits for old_docids 5 and 8, which is now only 2 (because doc 5 has
+ // been deleted).
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-60, /*key_upper=*/-60),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+
+ // 2048 had a hit for old_docid 21, which is now 1.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/2048, /*key_upper=*/2048),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+
+ // 156 had a hit for old_docid 34, which is not found now (because doc 34 has
+ // been deleted).
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/156, /*key_upper=*/156),
+ IsOkAndHolds(IsEmpty()));
+
+ // 20 had a hit for old_docid 55, which is now 4.
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/20, /*key_upper=*/20),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{120}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-2000}));
+ ASSERT_THAT(storage->num_data(), Eq(2));
+
+ // Create document_id_old_to_new with size = 2. TransferIndex should handle
+ // out of range DocumentId properly.
+ std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId, 0};
+
+ // Transfer to new storage.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
+ Options(), serializer_.get()));
+ EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+
+ // Verify after transferring.
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(new_storage->num_data(), Eq(1));
+ EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/120, /*key_upper=*/120),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(
+ Query(new_storage.get(), /*key_lower=*/-2000, /*key_upper=*/-2000),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, TransferEmptyIndex) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+ ASSERT_THAT(storage->num_data(), Eq(0));
+
+ std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId, 0, 1,
+ kInvalidDocumentId, 2};
+
+ // Transfer to new storage.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
+ Options(), serializer_.get()));
+ EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+
+ // Verify after transferring.
+ EXPECT_THAT(new_storage->num_data(), Eq(0));
+ EXPECT_THAT(Query(new_storage.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, TransferIndexDeleteAll) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{1024}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/13, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ASSERT_THAT(storage->num_data(), Eq(6));
+
+ // Delete all documents.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+
+ // Transfer to new storage.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
+ Options(), serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ ICING_ASSERT_OK(new_storage->PersistToDisk());
+ }
+
+ // Verify after transferring and reinitializing the instance.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
+ Options(), serializer_.get()));
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(new_storage->num_data(), Eq(0));
+ EXPECT_THAT(Query(new_storage.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
+ // This test verifies that if TransferIndex invokes bucket merging logic to
+ // ensure sure we're able to avoid having mostly empty buckets after inserting
+ // and deleting data for many rounds.
+
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{1024}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{-60}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/6, kDefaultSectionId,
+ /*new_keys=*/{2048}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/7, kDefaultSectionId,
+ /*new_keys=*/{156}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{20}));
+ ASSERT_THAT(storage->num_data(), Eq(9));
+ ASSERT_THAT(storage->num_data(),
+ Le(IntegerIndexStorage::kNumDataThresholdForBucketMerge));
+
+ // Create document_id_old_to_new that keeps all existing documents.
+ std::vector<DocumentId> document_id_old_to_new(9);
+ std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+ // Transfer to new storage. It should result in 1 bucket: [INT64_MIN,
+ // INT64_MAX] after transferring.
+ const std::string new_storage_working_path = working_path_ + "_temp";
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, new_storage_working_path,
+ Options(), serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ }
+
+ // Check new_storage->sorted_bucket_ manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ new_storage_working_path, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(bk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+}
+
+TEST_F(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
+ // This test verifies that if TransferIndex invokes bucket merging logic and
+ // doesn't merge buckets too aggressively to ensure we won't get a bucket with
+ // too many data.
+
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Insert data into 2 buckets so that total # of these 2 buckets exceed
+ // kNumDataThresholdForBucketMerge.
+ // - Bucket 1: [-1000, -100]
+ // - Bucket 2: [101, 149]
+ DocumentId document_id = 0;
+ int num_data_for_bucket1 = 200;
+ for (int i = 0; i < num_data_for_bucket1; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
+ /*new_keys=*/{-200}));
+ ++document_id;
+ }
+
+ int num_data_for_bucket2 = 150;
+ for (int i = 0; i < num_data_for_bucket2; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
+ /*new_keys=*/{120}));
+ ++document_id;
+ }
+
+ ASSERT_THAT(num_data_for_bucket1 + num_data_for_bucket2,
+ Gt(IntegerIndexStorage::kNumDataThresholdForBucketMerge));
+
+ // Create document_id_old_to_new that keeps all existing documents.
+ std::vector<DocumentId> document_id_old_to_new(document_id);
+ std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+ // Transfer to new storage. This should result in 2 buckets: [INT64_MIN, 100]
+ // and [101, INT64_MAX]
+ const std::string new_storage_working_path = working_path_ + "_temp";
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(filesystem_, new_storage_working_path,
+ Options(), serializer_.get()));
+ EXPECT_THAT(
+ storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+ IsOk());
+ }
+
+ // Check new_storage->sorted_bucket_ manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ new_storage_working_path, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(2));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(bk1->key_upper(), Eq(100));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk2, sorted_buckets->Get(/*idx=*/1));
+ EXPECT_THAT(bk2->key_lower(), Eq(101));
+ EXPECT_THAT(bk2->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
index 4de437e..a2d40f1 100644
--- a/icing/index/numeric/integer-index.cc
+++ b/icing/index/numeric/integer-index.cc
@@ -24,8 +24,10 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/store/document-id.h"
@@ -95,11 +97,38 @@ GetPropertyIntegerIndexStorageMap(
} // namespace
+libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
+ auto iter = integer_index_.property_to_storage_map_.find(property_path_);
+ IntegerIndexStorage* target_storage = nullptr;
+ if (iter != integer_index_.property_to_storage_map_.end()) {
+ target_storage = iter->second.get();
+ } else {
+ // A new property path. Create a new storage instance and insert into the
+ // map.
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ integer_index_.filesystem_,
+ GetPropertyIndexStoragePath(integer_index_.working_path_,
+ property_path_),
+ IntegerIndexStorage::Options(),
+ integer_index_.posting_list_serializer_.get()));
+ target_storage = new_storage.get();
+ integer_index_.property_to_storage_map_.insert(
+ std::make_pair(property_path_, std::move(new_storage)));
+ }
+
+ return target_storage->AddKeys(document_id_, section_id_,
+ std::move(seen_keys_));
+}
+
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
IntegerIndex::Create(const Filesystem& filesystem, std::string working_path) {
if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
// Discard working_path if metadata file is missing, and reinitialize.
- ICING_RETURN_IF_ERROR(Discard(filesystem, working_path, kWorkingPathType));
+ if (filesystem.DirectoryExists(working_path.c_str())) {
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ }
return InitializeNewFiles(filesystem, std::move(working_path));
}
return InitializeExistingFiles(filesystem, std::move(working_path));
@@ -113,7 +142,74 @@ IntegerIndex::~IntegerIndex() {
}
}
-libtextclassifier3::Status IntegerIndex::Reset() {
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
+ int64_t key_upper) const {
+ auto iter = property_to_storage_map_.find(std::string(property_path));
+ if (iter == property_to_storage_map_.end()) {
+ // Return an empty iterator.
+ return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+ /*numeric_index_iter=*/nullptr);
+ }
+
+ return iter->second->GetIterator(key_lower, key_upper);
+}
+
+libtextclassifier3::Status IntegerIndex::Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) {
+ std::string temp_working_path = working_path_ + "_temp";
+ ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
+
+ DestructibleDirectory temp_working_path_ddir(&filesystem_,
+ std::move(temp_working_path));
+ if (!temp_working_path_ddir.is_valid()) {
+ return absl_ports::InternalError(
+ "Unable to create temp directory to build new integer index");
+ }
+
+ {
+ // Transfer all indexed data from current integer index to new integer
+ // index. Also PersistToDisk and destruct the instance after finishing, so
+ // we can safely swap directories later.
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerIndex> new_integer_index,
+ Create(filesystem_, temp_working_path_ddir.dir()));
+ ICING_RETURN_IF_ERROR(
+ TransferIndex(document_id_old_to_new, new_integer_index.get()));
+ new_integer_index->set_last_added_document_id(new_last_added_document_id);
+ ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
+ }
+
+ // Destruct current storage instances to safely swap directories.
+ metadata_mmapped_file_.reset();
+ property_to_storage_map_.clear();
+ if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+ working_path_.c_str())) {
+ return absl_ports::InternalError(
+ "Unable to apply new integer index due to failed swap");
+ }
+
+ // Reinitialize the integer index.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem_, GetMetadataFilePath(working_path_),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ metadata_mmapped_file_ =
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
+
+ // Initialize all existing integer index storages.
+ ICING_ASSIGN_OR_RETURN(
+ property_to_storage_map_,
+ GetPropertyIntegerIndexStorageMap(filesystem_, working_path_,
+ posting_list_serializer_.get()));
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::Clear() {
// Step 1: clear property_to_storage_map_.
property_to_storage_map_.clear();
@@ -128,7 +224,7 @@ libtextclassifier3::Status IntegerIndex::Reset() {
GetPropertyIndexStoragePath(working_path_, property_path)));
}
- info()->last_added_document_id = kInvalidDocumentId;
+ info().last_added_document_id = kInvalidDocumentId;
return libtextclassifier3::Status::OK;
}
@@ -160,9 +256,9 @@ IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
/*property_to_storage_map=*/{}));
// Initialize info content by writing mapped memory directly.
- Info* info_ptr = new_integer_index->info();
- info_ptr->magic = Info::kMagic;
- info_ptr->last_added_document_id = kInvalidDocumentId;
+ Info& info_ref = new_integer_index->info();
+ info_ref.magic = Info::kMagic;
+ info_ref.last_added_document_id = kInvalidDocumentId;
// Initialize new PersistentStorage. The initial checksums will be computed
// and set via InitializeNewStorage.
ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
@@ -200,13 +296,41 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
// Validate magic.
- if (integer_index->info()->magic != Info::kMagic) {
+ if (integer_index->info().magic != Info::kMagic) {
return absl_ports::FailedPreconditionError("Incorrect magic value");
}
return integer_index;
}
+libtextclassifier3::Status IntegerIndex::TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndex* new_integer_index) const {
+ for (const auto& [property_path, old_storage] : property_to_storage_map_) {
+ std::string new_storage_working_path = GetPropertyIndexStoragePath(
+ new_integer_index->working_path_, property_path);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> new_storage,
+ IntegerIndexStorage::Create(
+ new_integer_index->filesystem_, new_storage_working_path,
+ IntegerIndexStorage::Options(),
+ new_integer_index->posting_list_serializer_.get()));
+
+ ICING_RETURN_IF_ERROR(
+ old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
+
+ if (new_storage->num_data() == 0) {
+ new_storage.reset();
+ ICING_RETURN_IF_ERROR(
+ IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
+ } else {
+ new_integer_index->property_to_storage_map_.insert(
+ std::make_pair(property_path, std::move(new_storage)));
+ }
+ }
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
for (auto& [_, storage] : property_to_storage_map_) {
ICING_RETURN_IF_ERROR(storage->PersistToDisk());
@@ -222,7 +346,7 @@ libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk() {
}
libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum() {
- return info()->ComputeChecksum();
+ return info().ComputeChecksum();
}
libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum() {
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index a00d339..98c26ef 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -90,25 +90,107 @@ class IntegerIndex : public NumericIndex<int64_t> {
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
const Filesystem& filesystem, std::string working_path);
+ // Deletes IntegerIndex under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
~IntegerIndex() override;
- // TODO(b/249829533): implement these functions and add comments.
+ // Returns an Editor instance for adding new records into integer index for a
+ // given property, DocumentId and SectionId. See Editor for more details.
std::unique_ptr<typename NumericIndex<int64_t>::Editor> Edit(
std::string_view property_path, DocumentId document_id,
- SectionId section_id) override;
+ SectionId section_id) override {
+ return std::make_unique<Editor>(property_path, document_id, section_id,
+ *this);
+ }
+ // Returns a DocHitInfoIterator for iterating through all docs which have the
+ // specified (integer) property contents in range [query_key_lower,
+ // query_key_upper].
+ // When iterating through all relevant doc hits, it:
+ // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+ // SectionIdMask and constructs DocHitInfo.
+ // - Returns DocHitInfo in descending DocumentId order.
+ //
+ // Returns:
+ // - On success: a DocHitInfoIterator instance
+ // - NOT_FOUND_ERROR if the given property_path doesn't exist
+ // - Any IntegerIndexStorage errors
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
std::string_view property_path, int64_t key_lower,
int64_t key_upper) const override;
- // Clears all integer index data.
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Integer index will convert all data (hits) to the new document
+ // ids and regenerate all index files. If all data in a property path are
+ // completely deleted, then the underlying storage will be discarded as well.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the integer index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on IO error
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
+
+ // Clears all integer index data by discarding all existing storages, and set
+ // last_added_document_id to kInvalidDocumentId.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status Reset() override;
+ libtextclassifier3::Status Clear() override;
+
+ DocumentId last_added_document_id() const override {
+ return info().last_added_document_id;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) override {
+ Info& info_ref = info();
+ if (info_ref.last_added_document_id == kInvalidDocumentId ||
+ document_id > info_ref.last_added_document_id) {
+ info_ref.last_added_document_id = document_id;
+ }
+ }
private:
+ class Editor : public NumericIndex<int64_t>::Editor {
+ public:
+ explicit Editor(std::string_view property_path, DocumentId document_id,
+ SectionId section_id, IntegerIndex& integer_index)
+ : NumericIndex<int64_t>::Editor(property_path, document_id, section_id),
+ integer_index_(integer_index) {}
+
+ ~Editor() override = default;
+
+ libtextclassifier3::Status BufferKey(int64_t key) override {
+ seen_keys_.push_back(key);
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::Status IndexAllBufferedKeys() && override;
+
+ private:
+ // Vector for caching all seen keys. Since IntegerIndexStorage::AddKeys
+ // sorts and dedupes keys, we can just simply use vector here and move it to
+ // AddKeys().
+ std::vector<int64_t> seen_keys_;
+
+ IntegerIndex& integer_index_; // Does not own.
+ };
+
explicit IntegerIndex(const Filesystem& filesystem,
std::string&& working_path,
std::unique_ptr<PostingListIntegerIndexSerializer>
@@ -128,6 +210,18 @@ class IntegerIndex : public NumericIndex<int64_t> {
InitializeExistingFiles(const Filesystem& filesystem,
std::string&& working_path);
+ // Transfers integer index data from the current integer index to
+ // new_integer_index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+ // in an invalid state and the caller should handle it property (e.g.
+ // discard and rebuild)
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndex* new_integer_index) const;
+
// Flushes contents of all storages to underlying files.
//
// Returns:
@@ -148,8 +242,9 @@ class IntegerIndex : public NumericIndex<int64_t> {
// - Crc of the Info on success
libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
- // Computes and returns all storages checksum. Checksums of bucket_storage_,
- // entry_storage_ and kv_storage_ will be combined together by XOR.
+ // Computes and returns all storages checksum. Checksums of (storage_crc,
+ // property_path) for all existing property paths will be combined together by
+ // XOR.
//
// Returns:
// - Crc of all storages on success
@@ -166,14 +261,14 @@ class IntegerIndex : public NumericIndex<int64_t> {
kCrcsMetadataFileOffset);
}
- Info* info() {
- return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
- kInfoMetadataFileOffset);
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
}
- const Info* info() const {
- return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
- kInfoMetadataFileOffset);
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
}
std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
new file mode 100644
index 0000000..c6cf855
--- /dev/null
+++ b/icing/index/numeric/integer-index_test.cc
@@ -0,0 +1,1189 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index.h"
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Lt;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = IntegerIndex::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+constexpr static std::string_view kDefaultTestPropertyPath = "test.property";
+
+constexpr SectionId kDefaultSectionId = 0;
+
+template <typename T>
+class NumericIndexIntegerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/numeric_index_integer_test";
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ template <typename UnknownIntegerIndexType>
+ libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+ CreateIntegerIndex() {
+ return absl_ports::InvalidArgumentError("Unknown type");
+ }
+
+ template <>
+ libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+ CreateIntegerIndex<DummyNumericIndex<int64_t>>() {
+ return DummyNumericIndex<int64_t>::Create(filesystem_, working_path_);
+ }
+
+ template <>
+ libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+ CreateIntegerIndex<IntegerIndex>() {
+ return IntegerIndex::Create(filesystem_, working_path_);
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
+};
+
+void Index(NumericIndex<int64_t>* integer_index, std::string_view property_path,
+ DocumentId document_id, SectionId section_id,
+ std::vector<int64_t> keys) {
+ std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+ integer_index->Edit(property_path, document_id, section_id);
+
+ for (const auto& key : keys) {
+ ICING_EXPECT_OK(editor->BufferKey(key));
+ }
+ ICING_EXPECT_OK(std::move(*editor).IndexAllBufferedKeys());
+}
+
+libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+ const NumericIndex<int64_t>* integer_index, std::string_view property_path,
+ int64_t key_lower, int64_t key_upper) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> iter,
+ integer_index->GetIterator(property_path, key_lower, key_upper));
+
+ std::vector<DocHitInfo> result;
+ while (iter->Advance().ok()) {
+ result.push_back(iter->doc_hit_info());
+ }
+ return result;
+}
+
+using TestTypes = ::testing::Types<DummyNumericIndex<int64_t>, IntegerIndex>;
+TYPED_TEST_SUITE(NumericIndexIntegerTest, TestTypes);
+
+TYPED_TEST(NumericIndexIntegerTest, SetLastAddedDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ constexpr DocumentId kDocumentId = 100;
+ integer_index->set_last_added_document_id(kDocumentId);
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 123;
+ integer_index->set_last_added_document_id(kNextDocumentId);
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TYPED_TEST(
+ NumericIndexIntegerTest,
+ SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ constexpr DocumentId kDocumentId = 123;
+ integer_index->set_last_added_document_id(kDocumentId);
+ ASSERT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+
+ constexpr DocumentId kNextDocumentId = 100;
+ ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+ integer_index->set_last_added_document_id(kNextDocumentId);
+ // last_added_document_id() should remain unchanged.
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, SingleKeyExactQuery) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ int64_t query_key = 2;
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/query_key, /*key_upper=*/query_key),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, SingleKeyRangeQuery) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, EmptyResult) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/10, /*key_upper=*/10),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/100, /*key_upper=*/200),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ NonExistingPropertyPathShouldReturnEmptyResult) {
+ constexpr std::string_view kAnotherPropertyPath = "another_property";
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+
+ EXPECT_THAT(Query(integer_index.get(), kAnotherPropertyPath,
+ /*key_lower=*/100, /*key_upper=*/200),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ MultipleKeysShouldMergeAndDedupeDocHitInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Construct several documents with mutiple keys under the same section.
+ // Range query [1, 3] will find hits with same (DocumentId, SectionId) for
+ // mutiple times. For example, (2, kDefaultSectionId) will be found twice
+ // (once for key = 1 and once for key = 3).
+ // Test if the iterator dedupes correctly.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{-1000, 0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{-100, 0, 1, 2, 3, 4, 5});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3, 1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{4, 1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{1, 6});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2, 100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/6,
+ kDefaultSectionId, /*keys=*/{1000, 2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/7,
+ kDefaultSectionId, /*keys=*/{4, -1000});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, EdgeNumericValues) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{-100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{-80});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::max()});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::min()});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{200});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/6,
+ kDefaultSectionId, /*keys=*/{100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/7,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::max()});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/9,
+ kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::min()});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+
+ // Negative key
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/-100, /*key_upper=*/-70),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+
+ // INT64_MAX key
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::max(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+ // INT64_MIN key
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::min()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+
+ // Key = 0
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/0, /*key_upper=*/0),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+
+ // All keys from INT64_MIN to INT64_MAX
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Construct several documents with mutiple numeric sections.
+ // Range query [1, 3] will find hits with same DocumentIds but multiple
+ // different SectionIds. For example, there will be 2 hits (1, 0), (1, 1) for
+ // DocumentId=1.
+ // Test if the iterator merges multiple sections into a single SectionIdMask
+ // correctly.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/2, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/1, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/0, /*keys=*/{-1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/1, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/0, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/5, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/4, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/3, /*keys=*/{5});
+
+ EXPECT_THAT(
+ Query(integer_index.get(), kDefaultTestPropertyPath, /*key_lower=*/1,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, std::vector<SectionId>{4, 5}),
+ EqualsDocHitInfo(/*document_id=*/1, std::vector<SectionId>{1, 2}),
+ EqualsDocHitInfo(/*document_id=*/0, std::vector<SectionId>{1}))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, NonRelevantPropertyShouldNotBeIncluded) {
+ constexpr std::string_view kNonRelevantProperty = "non_relevant_property";
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kNonRelevantProperty, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kNonRelevantProperty, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+ RangeQueryKeyLowerGreaterThanKeyUpperShouldReturnError) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{2});
+
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/3, /*key_upper=*/1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, Optimize) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/13,
+ kDefaultSectionId, /*keys=*/{2});
+
+ // Delete doc id = 3, 5, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+ document_id_old_to_new[1] = 0;
+ document_id_old_to_new[2] = 1;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 3;
+
+ DocumentId new_last_added_document_id = 3;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ // Verify index and query API still work normally after Optimize().
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/1, /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/3, /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/0, /*key_upper=*/0),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/4, /*key_upper=*/4),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/2, /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{123});
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/123, /*key_upper=*/123),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeMultiplePropertyPaths) {
+ constexpr std::string_view kPropertyPath1 = "prop1";
+ constexpr SectionId kSectionId1 = 0;
+ constexpr std::string_view kPropertyPath2 = "prop2";
+ constexpr SectionId kSectionId2 = 1;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Doc id = 1: insert 2 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 2: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+ /*keys=*/{3});
+
+ // Doc id = 3: insert 2 data for "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+ /*keys=*/{4});
+
+ // Doc id = 5: insert 3 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/5, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/5, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 8: insert 1 data for "prop2".
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/8, kSectionId2,
+ /*keys=*/{3});
+
+ // Doc id = 13: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/13, kSectionId1,
+ /*keys=*/{4});
+
+ // Delete doc id = 3, 5, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+ document_id_old_to_new[1] = 0;
+ document_id_old_to_new[2] = 1;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 3;
+
+ DocumentId new_last_added_document_id = 3;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ // Verify index and query API still work normally after Optimize().
+ // Key = 1
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 2
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(IsEmpty()));
+
+ // key = 3
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 4
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeShouldDiscardEmptyPropertyStorage) {
+ constexpr std::string_view kPropertyPath1 = "prop1";
+ constexpr SectionId kSectionId1 = 0;
+ constexpr std::string_view kPropertyPath2 = "prop2";
+ constexpr SectionId kSectionId2 = 1;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ // Doc id = 1: insert 2 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 2: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+ /*keys=*/{3});
+
+ // Doc id = 3: insert 2 data for "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+ /*keys=*/{4});
+
+ // Delete doc id = 1, 3, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(4, kInvalidDocumentId);
+ document_id_old_to_new[2] = 0;
+
+ DocumentId new_last_added_document_id = 0;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+
+ // All data in "prop2" as well as the underlying storage should be deleted, so
+ // when querying "prop2", we should get empty result.
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+ if (std::is_same_v<IntegerIndex, TypeParam>) {
+ std::string prop2_storage_working_path =
+ absl_ports::StrCat(this->working_path_, "/", kPropertyPath2);
+ EXPECT_THAT(
+ this->filesystem_.DirectoryExists(prop2_storage_working_path.c_str()),
+ IsFalse());
+ }
+
+ // Verify we can still index and query for "prop2".
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
+ /*keys=*/{123});
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2,
+ /*key_lower=*/123, /*key_upper=*/123),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeOutOfRangeDocumentId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3});
+
+ // Create document_id_old_to_new with size = 2. Optimize should handle out of
+ // range DocumentId properly.
+ std::vector<DocumentId> document_id_old_to_new(2, kInvalidDocumentId);
+
+ EXPECT_THAT(integer_index->Optimize(
+ document_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeDeleteAll) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ kDefaultSectionId, /*keys=*/{3});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{2});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+ kDefaultSectionId, /*keys=*/{0});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+ kDefaultSectionId, /*keys=*/{4});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/13,
+ kDefaultSectionId, /*keys=*/{2});
+
+ // Delete all documents.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+
+ EXPECT_THAT(integer_index->Optimize(
+ document_id_old_to_new,
+ /*new_last_added_document_id=*/kInvalidDocumentId),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // Verify all data are discarded after Optimize().
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, Clear) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ this->template CreateIntegerIndex<TypeParam>());
+
+ Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/0,
+ kDefaultSectionId, /*keys=*/{1});
+ Index(integer_index.get(), /*property_path=*/"B", /*document_id=*/1,
+ kDefaultSectionId, /*keys=*/{3});
+ integer_index->set_last_added_document_id(1);
+
+ ASSERT_THAT(integer_index->last_added_document_id(), Eq(1));
+ ASSERT_THAT(
+ Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kDefaultSectionId}))));
+ ASSERT_THAT(
+ Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/1, std::vector<SectionId>{kDefaultSectionId}))));
+
+ // After resetting, last_added_document_id should be set to
+ // kInvalidDocumentId, and the previous added keys should be deleted.
+ ICING_ASSERT_OK(integer_index->Clear());
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(IsEmpty()));
+
+ // Integer index should be able to work normally after Clear().
+ Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/3,
+ kDefaultSectionId, /*keys=*/{123});
+ Index(integer_index.get(), /*property_path=*/"B", /*document_id=*/4,
+ kDefaultSectionId, /*keys=*/{456});
+ integer_index->set_last_added_document_id(4);
+
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(4));
+ EXPECT_THAT(
+ Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/123,
+ /*key_upper=*/123),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/3, std::vector<SectionId>{kDefaultSectionId}))));
+ EXPECT_THAT(
+ Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/456,
+ /*key_upper=*/456),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/4, std::vector<SectionId>{kDefaultSectionId}))));
+}
+
+// Tests for persistent integer index only
+class IntegerIndexTest : public NumericIndexIntegerTest<IntegerIndex> {};
+
+TEST_F(IntegerIndexTest, InvalidWorkingPath) {
+ EXPECT_THAT(IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test"),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(IntegerIndexTest, InitializeNewFiles) {
+ {
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ // Check info section
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ IntegerIndex::kInfoMetadataFileOffset));
+ EXPECT_THAT(info.magic, Eq(Info::kMagic));
+ EXPECT_THAT(info.last_added_document_id, Eq(kInvalidDocumentId));
+
+ // Check crcs section
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ IntegerIndex::kCrcsMetadataFileOffset));
+ // There are no storages initially, so storages_crc should be 0.
+ EXPECT_THAT(crcs.component_crcs.storages_crc, Eq(0));
+ EXPECT_THAT(crcs.component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs.all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs.component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_F(IntegerIndexTest,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(IntegerIndex::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index1,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Insert some data.
+ Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+ integer_index1->set_last_added_document_id(2);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> doc_hit_info_vec,
+ Query(integer_index1.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(integer_index1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index2,
+ IntegerIndex::Create(filesystem_, working_path_));
+ EXPECT_THAT(integer_index2->last_added_document_id(), Eq(2));
+ EXPECT_THAT(Query(integer_index2.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+ doc_hit_info_vec.end())));
+}
+
+TEST_F(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) {
+ std::vector<DocHitInfo> doc_hit_info_vec;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+ integer_index->set_last_added_document_id(2);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_vec,
+ Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+ EXPECT_THAT(integer_index->last_added_document_id(), Eq(2));
+ EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+ doc_hit_info_vec.end())));
+ }
+}
+
+TEST_F(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ IntegerIndex::kCrcsMetadataFileOffset));
+
+ // Manually corrupt all_crc
+ crcs.all_crc += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndexStorage::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index with metadata containing corrupted
+ // all_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or = IntegerIndex::Create(filesystem_, working_path_);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Invalid all crc"));
+ }
+}
+
+TEST_F(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ IntegerIndex::kInfoMetadataFileOffset));
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ info.last_added_document_id += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndex::kInfoMetadataFileOffset, &info,
+ sizeof(Info)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index with info that doesn't match its
+ // checksum and confirm that it fails.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or = IntegerIndex::Create(filesystem_, working_path_);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Invalid info crc"));
+ }
+}
+
+TEST_F(IntegerIndexTest,
+ InitializeExistingFilesWithCorruptedStoragesShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ {
+ // Corrupt integer index storage for kDefaultTestPropertyPath manually.
+ PostingListIntegerIndexSerializer posting_list_integer_index_serializer;
+ std::string storage_working_path =
+ absl_ports::StrCat(working_path_, "/", kDefaultTestPropertyPath);
+ ASSERT_TRUE(filesystem_.DirectoryExists(storage_working_path.c_str()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, std::move(storage_working_path),
+ IntegerIndexStorage::Options(),
+ &posting_list_integer_index_serializer));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, /*section_id=*/4,
+ /*new_keys=*/{3, 4, 5}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ {
+ // Attempt to create the integer index with corrupted storages. This should
+ // fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or = IntegerIndex::Create(filesystem_, working_path_);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
+ }
+}
+TEST_F(IntegerIndexTest,
+ IntegerIndexShouldWorkAfterOptimizeAndReinitialization) {
+ constexpr std::string_view kPropertyPath1 = "prop1";
+ constexpr SectionId kSectionId1 = 0;
+ constexpr std::string_view kPropertyPath2 = "prop2";
+ constexpr SectionId kSectionId2 = 1;
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Doc id = 1: insert 2 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 2: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+ /*keys=*/{3});
+
+ // Doc id = 3: insert 2 data for "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+ /*keys=*/{4});
+
+ // Doc id = 5: insert 3 data for "prop1", "prop2"
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/5, kSectionId2,
+ /*keys=*/{1});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/5, kSectionId1,
+ /*keys=*/{2});
+
+ // Doc id = 8: insert 1 data for "prop2".
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/8, kSectionId2,
+ /*keys=*/{3});
+
+ // Doc id = 13: insert 1 data for "prop1".
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/13, kSectionId1,
+ /*keys=*/{4});
+
+ // Delete doc id = 3, 5, compress and keep the rest.
+ std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+ document_id_old_to_new[1] = 0;
+ document_id_old_to_new[2] = 1;
+ document_id_old_to_new[8] = 2;
+ document_id_old_to_new[13] = 3;
+
+ DocumentId new_last_added_document_id = 3;
+ EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+ new_last_added_document_id),
+ IsOk());
+ EXPECT_THAT(integer_index->last_added_document_id(),
+ Eq(new_last_added_document_id));
+ }
+
+ {
+ // Reinitialize IntegerIndex and verify index and query API still work
+ // normally.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_));
+
+ // Key = 1
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+ /*key_upper=*/1),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 2
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+ /*key_upper=*/2),
+ IsOkAndHolds(IsEmpty()));
+
+ // key = 3
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+ /*key_upper=*/3),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
+
+ // key = 4
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
+ EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+ /*key_upper=*/4),
+ IsOkAndHolds(IsEmpty()));
+
+ // Index new data.
+ Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
+ /*keys=*/{123});
+ Index(integer_index.get(), kPropertyPath1, /*document_id=*/100, kSectionId1,
+ /*keys=*/{456});
+ EXPECT_THAT(
+ Query(integer_index.get(), kPropertyPath2, /*key_lower=*/123,
+ /*key_upper=*/456),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
+ EXPECT_THAT(
+ Query(integer_index.get(), kPropertyPath1, /*key_lower=*/123,
+ /*key_upper=*/456),
+ IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+ /*document_id=*/100, std::vector<SectionId>{kSectionId1}))));
+ }
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
index a9d65d4..347260a 100644
--- a/icing/index/numeric/numeric-index.h
+++ b/icing/index/numeric/numeric-index.h
@@ -67,7 +67,7 @@ class NumericIndex : public PersistentStorage {
// Returns:
// - OK on success
// - Any other errors, depending on the actual implementation
- virtual libtextclassifier3::Status IndexAllBufferedKeys() = 0;
+ virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0;
protected:
std::string property_path_;
@@ -129,9 +129,38 @@ class NumericIndex : public PersistentStorage {
GetIterator(std::string_view property_path, T key_lower,
T key_upper) const = 0;
- // Clears all files created by the index. Returns OK if all files were
- // cleared.
- virtual libtextclassifier3::Status Reset() = 0;
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Numeric index will convert all data (hits) to the new document
+ // ids and regenerate all index files. If all data in a property path are
+ // completely deleted, then the underlying storage must be discarded as well.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the numeric index.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on the actual implementation
+ virtual libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) = 0;
+
+ // Clears all data in the integer index and set last_added_document_id to
+ // kInvalidDocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on the actual implementation
+ virtual libtextclassifier3::Status Clear() = 0;
+
+ // Returns the largest document_id added to the index. Note that DocumentIds
+ // are always inserted in increasing order.
+ virtual DocumentId last_added_document_id() const = 0;
+
+ // Sets last_added_document_id to document_id so long as document_id >
+ // last_added_document_id() or last_added_document_id() is invalid.
+ virtual void set_last_added_document_id(DocumentId document_id) = 0;
protected:
explicit NumericIndex(const Filesystem& filesystem,
diff --git a/icing/index/numeric/numeric-index_test.cc b/icing/index/numeric/numeric-index_test.cc
deleted file mode 100644
index d4ff963..0000000
--- a/icing/index/numeric/numeric-index_test.cc
+++ /dev/null
@@ -1,380 +0,0 @@
-// Copyright (C) 2022 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/numeric/numeric-index.h"
-
-#include <limits>
-#include <string>
-#include <string_view>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/file/filesystem.h"
-#include "icing/index/hit/doc-hit-info.h"
-#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/tmp-directory.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-using ::testing::ElementsAre;
-using ::testing::IsEmpty;
-using ::testing::IsTrue;
-using ::testing::NotNull;
-
-constexpr static std::string_view kDefaultTestPropertyName = "test";
-
-constexpr SectionId kDefaultSectionId = 0;
-
-template <typename T>
-class NumericIndexTest : public ::testing::Test {
- protected:
- using INDEX_IMPL_TYPE = T;
-
- void SetUp() override {
- base_dir_ = GetTestTempDir() + "/icing";
- ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
- IsTrue());
-
- working_path_ = base_dir_ + "/numeric_index_integer_test";
-
- if (std::is_same_v<
- INDEX_IMPL_TYPE,
- DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>>) {
- ICING_ASSERT_OK_AND_ASSIGN(
- numeric_index_,
- DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>::Create(
- filesystem_, working_path_));
- }
-
- ASSERT_THAT(numeric_index_, NotNull());
- }
-
- void TearDown() override {
- numeric_index_.reset();
- filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
- }
-
- void Index(std::string_view property_name, DocumentId document_id,
- SectionId section_id,
- std::vector<typename INDEX_IMPL_TYPE::value_type> keys) {
- std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
- this->numeric_index_->Edit(property_name, document_id, section_id);
-
- for (const auto& key : keys) {
- ICING_EXPECT_OK(editor->BufferKey(key));
- }
- ICING_EXPECT_OK(editor->IndexAllBufferedKeys());
- }
-
- libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
- std::string_view property_name,
- typename INDEX_IMPL_TYPE::value_type key_lower,
- typename INDEX_IMPL_TYPE::value_type key_upper) {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<DocHitInfoIterator> iter,
- this->numeric_index_->GetIterator(property_name, key_lower, key_upper));
-
- std::vector<DocHitInfo> result;
- while (iter->Advance().ok()) {
- result.push_back(iter->doc_hit_info());
- }
- return result;
- }
-
- Filesystem filesystem_;
- std::string base_dir_;
- std::string working_path_;
- std::unique_ptr<NumericIndex<typename INDEX_IMPL_TYPE::value_type>>
- numeric_index_;
-};
-
-using TestTypes = ::testing::Types<DummyNumericIndex<int64_t>>;
-TYPED_TEST_SUITE(NumericIndexTest, TestTypes);
-
-TYPED_TEST(NumericIndexTest, SingleKeyExactQuery) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- int64_t query_key = 2;
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/query_key,
- /*key_upper=*/query_key),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest, SingleKeyRangeQuery) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest, EmptyResult) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/100,
- /*key_upper=*/200),
- IsOkAndHolds(IsEmpty()));
-}
-
-TYPED_TEST(NumericIndexTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
- // Construct several documents with mutiple keys under the same section.
- // Range query [1, 3] will find hits with same (DocumentId, SectionId) for
- // mutiple times. For example, (2, kDefaultSectionId) will be found twice
- // (once for key = 1 and once for key = 3).
- // Test if the iterator dedupes correctly.
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{-1000, 0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{-100, 0, 1, 2, 3, 4, 5});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{3, 1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{4, 1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{1, 6});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2, 100});
- this->Index(kDefaultTestPropertyName, /*document_id=*/6, kDefaultSectionId,
- /*keys=*/{1000, 2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/7, kDefaultSectionId,
- /*keys=*/{4, -1000});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/6, expected_sections),
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/4, expected_sections),
- EqualsDocHitInfo(/*document_id=*/3, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest, EdgeNumericValues) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{-100});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{-80});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::max()});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::min()});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{200});
- this->Index(kDefaultTestPropertyName, /*document_id=*/6, kDefaultSectionId,
- /*keys=*/{100});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/7, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::max()});
- this->Index(kDefaultTestPropertyName, /*document_id=*/8, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(
- kDefaultTestPropertyName, /*document_id=*/9, kDefaultSectionId,
- /*keys=*/{std::numeric_limits<typename TypeParam::value_type>::min()});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
-
- // Negative key
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/-100,
- /*key_upper=*/-70),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
-
- // value_type max key
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/
- std::numeric_limits<typename TypeParam::value_type>::max(),
- /*key_upper=*/
- std::numeric_limits<typename TypeParam::value_type>::max()),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/7, expected_sections),
- EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
-
- // value_type min key
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/
- std::numeric_limits<typename TypeParam::value_type>::min(),
- /*key_upper=*/
- std::numeric_limits<typename TypeParam::value_type>::min()),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
- EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
-
- // Key = 0
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/0, /*key_upper=*/0),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/8, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-
- // All keys from value_type min to value_type max
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/
- std::numeric_limits<typename TypeParam::value_type>::min(),
- /*key_upper=*/
- std::numeric_limits<typename TypeParam::value_type>::max()),
- IsOkAndHolds(
- ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
- EqualsDocHitInfo(/*document_id=*/8, expected_sections),
- EqualsDocHitInfo(/*document_id=*/7, expected_sections),
- EqualsDocHitInfo(/*document_id=*/6, expected_sections),
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/4, expected_sections),
- EqualsDocHitInfo(/*document_id=*/3, expected_sections),
- EqualsDocHitInfo(/*document_id=*/2, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest,
- MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
- // Construct several documents with mutiple numeric sections.
- // Range query [1, 3] will find hits with same DocumentIds but multiple
- // different SectionIds. For example, there will be 2 hits (1, 0), (1, 1) for
- // DocumentId=1.
- // Test if the iterator merges multiple sections into a single SectionIdMask
- // correctly.
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, /*section_id=*/0,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, /*section_id=*/1,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, /*section_id=*/2,
- /*keys=*/{-1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, /*section_id=*/0,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, /*section_id=*/1,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, /*section_id=*/2,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, /*section_id=*/3,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, /*section_id=*/4,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, /*section_id=*/5,
- /*keys=*/{5});
-
- EXPECT_THAT(
- this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/2, std::vector<SectionId>{3, 4}),
- EqualsDocHitInfo(/*document_id=*/1, std::vector<SectionId>{0, 1}),
- EqualsDocHitInfo(/*document_id=*/0, std::vector<SectionId>{1}))));
-}
-
-TYPED_TEST(NumericIndexTest, NonRelevantPropertyShouldNotBeIncluded) {
- constexpr std::string_view kNonRelevantProperty = "non_relevant_property";
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kNonRelevantProperty, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kNonRelevantProperty, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- std::vector<SectionId> expected_sections{kDefaultSectionId};
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/1,
- /*key_upper=*/3),
- IsOkAndHolds(ElementsAre(
- EqualsDocHitInfo(/*document_id=*/5, expected_sections),
- EqualsDocHitInfo(/*document_id=*/1, expected_sections),
- EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
-}
-
-TYPED_TEST(NumericIndexTest,
- RangeQueryKeyLowerGreaterThanKeyUpperShouldReturnError) {
- this->Index(kDefaultTestPropertyName, /*document_id=*/0, kDefaultSectionId,
- /*keys=*/{1});
- this->Index(kDefaultTestPropertyName, /*document_id=*/1, kDefaultSectionId,
- /*keys=*/{3});
- this->Index(kDefaultTestPropertyName, /*document_id=*/2, kDefaultSectionId,
- /*keys=*/{2});
- this->Index(kDefaultTestPropertyName, /*document_id=*/3, kDefaultSectionId,
- /*keys=*/{0});
- this->Index(kDefaultTestPropertyName, /*document_id=*/4, kDefaultSectionId,
- /*keys=*/{4});
- this->Index(kDefaultTestPropertyName, /*document_id=*/5, kDefaultSectionId,
- /*keys=*/{2});
-
- EXPECT_THAT(this->Query(kDefaultTestPropertyName, /*key_lower=*/3,
- /*key_upper=*/1),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-} // namespace
-
-} // namespace lib
-} // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.cc b/icing/index/numeric/posting-list-integer-index-accessor.cc
index 271f8dc..220b240 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor.cc
@@ -38,16 +38,12 @@ PostingListIntegerIndexAccessor::Create(
FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer) {
uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
storage->block_size(), serializer->GetDataTypeBytes());
- std::unique_ptr<uint8_t[]> posting_list_buffer_array =
- std::make_unique<uint8_t[]>(max_posting_list_bytes);
- ICING_ASSIGN_OR_RETURN(
- PostingListUsed posting_list_buffer,
- PostingListUsed::CreateFromUnitializedRegion(
- serializer, posting_list_buffer_array.get(), max_posting_list_bytes));
+ ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+ PostingListUsed::CreateFromUnitializedRegion(
+ serializer, max_posting_list_bytes));
return std::unique_ptr<PostingListIntegerIndexAccessor>(
new PostingListIntegerIndexAccessor(
- storage, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer), serializer));
+ storage, std::move(in_memory_posting_list), serializer));
}
/* static */ libtextclassifier3::StatusOr<
@@ -79,20 +75,23 @@ PostingListIntegerIndexAccessor::GetNextDataBatch() {
ICING_ASSIGN_OR_RETURN(
std::vector<IntegerIndexData> batch,
serializer_->GetData(&preexisting_posting_list_->posting_list));
- uint32_t next_block_index;
+ uint32_t next_block_index = kInvalidBlockIndex;
// Posting lists will only be chained when they are max-sized, in which case
- // block.next_block_index() will point to the next block for the next posting
- // list. Otherwise, block.next_block_index() can be kInvalidBlockIndex or be
- // used to point to the next free list block, which is not relevant here.
- if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
- next_block_index = preexisting_posting_list_->block.next_block_index();
- } else {
- next_block_index = kInvalidBlockIndex;
+ // next_block_index will point to the next block for the next posting list.
+ // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+ // to the next free list block, which is not relevant here.
+ if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+ storage_->max_posting_list_bytes()) {
+ next_block_index = preexisting_posting_list_->next_block_index;
}
+
if (next_block_index != kInvalidBlockIndex) {
+ // Since we only have to deal with next block for max-sized posting list
+ // block, max_num_posting_lists is 1 and posting_list_index_bits is
+ // BitsToStore(1).
PostingListIdentifier next_posting_list_id(
next_block_index, /*posting_list_index=*/0,
- preexisting_posting_list_->block.posting_list_index_bits());
+ /*posting_list_index_bits=*/BitsToStore(1));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
storage_->GetPostingList(next_posting_list_id));
preexisting_posting_list_ =
@@ -108,7 +107,7 @@ libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
const IntegerIndexData& data) {
PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
? preexisting_posting_list_->posting_list
- : posting_list_buffer_;
+ : in_memory_posting_list_;
libtextclassifier3::Status status =
serializer_->PrependData(&active_pl, data);
if (!absl_ports::IsResourceExhausted(status)) {
@@ -118,16 +117,16 @@ libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
// we need to either move those data to a larger posting list or flush this
// posting list and create another max-sized posting list in the chain.
if (preexisting_posting_list_ != nullptr) {
- FlushPreexistingPostingList();
+ ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
} else {
ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
}
- // Re-add data. Should always fit since we just cleared posting_list_buffer_.
- // It's fine to explicitly reference posting_list_buffer_ here because there's
- // no way of reaching this line while preexisting_posting_list_ is still in
- // use.
- return serializer_->PrependData(&posting_list_buffer_, data);
+ // Re-add data. Should always fit since we just cleared
+ // in_memory_posting_list_. It's fine to explicitly reference
+ // in_memory_posting_list_ here because there's no way of reaching this line
+ // while preexisting_posting_list_ is still in use.
+ return serializer_->PrependData(&in_memory_posting_list_, data);
}
} // namespace lib
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.h b/icing/index/numeric/posting-list-integer-index-accessor.h
index 64a8901..4c1eced 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor.h
+++ b/icing/index/numeric/posting-list-integer-index-accessor.h
@@ -91,12 +91,9 @@ class PostingListIntegerIndexAccessor : public PostingListAccessor {
private:
explicit PostingListIntegerIndexAccessor(
- FlashIndexStorage* storage,
- std::unique_ptr<uint8_t[]> posting_list_buffer_array,
- PostingListUsed posting_list_buffer,
+ FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
PostingListIntegerIndexSerializer* serializer)
- : PostingListAccessor(storage, std::move(posting_list_buffer_array),
- std::move(posting_list_buffer)),
+ : PostingListAccessor(storage, std::move(in_memory_posting_list)),
serializer_(serializer) {}
PostingListIntegerIndexSerializer* serializer_; // Does not own.
diff --git a/icing/index/numeric/posting-list-integer-index-accessor_test.cc b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
index f5fd693..48221b9 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
@@ -119,7 +119,7 @@ TEST_F(PostingListIntegerIndexAccessorTest, DataAddAndRetrieveProperly) {
EXPECT_THAT(
serializer_->GetData(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
- EXPECT_THAT(pl_holder.block.next_block_index(), Eq(kInvalidBlockIndex));
+ EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
}
TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLKeepOnSameBlock) {
@@ -254,7 +254,7 @@ TEST_F(PostingListIntegerIndexAccessorTest, MultiBlockChainsBlocksProperly) {
ElementsAreArray(data_vec.rbegin(), first_block_data_start));
// Now retrieve all of the data that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
@@ -328,7 +328,7 @@ TEST_F(PostingListIntegerIndexAccessorTest,
ElementsAreArray(all_data_vec.rbegin(), first_block_data_start));
// Now retrieve all of the data that were on the first block.
- uint32_t first_block_id = pl_holder.block.next_block_index();
+ uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
diff --git a/icing/index/numeric/posting-list-integer-index-serializer_test.cc b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
index d3d54ef..bfb4e71 100644
--- a/icing/index/numeric/posting-list-integer-index-serializer_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
@@ -43,11 +43,9 @@ TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitNotNull) {
PostingListIntegerIndexSerializer serializer;
int size = 2551 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
ASSERT_THAT(serializer.PrependData(
&pl_used, IntegerIndexData(/*section_id=*/0,
@@ -69,11 +67,9 @@ TEST(PostingListIntegerIndexSerializerTest,
PostingListIntegerIndexSerializer serializer;
int size = 3 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
ASSERT_THAT(serializer.PrependData(
&pl_used, IntegerIndexData(/*section_id=*/0,
@@ -90,11 +86,9 @@ TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitFull) {
PostingListIntegerIndexSerializer serializer;
int size = 3 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
ASSERT_THAT(serializer.PrependData(
&pl_used, IntegerIndexData(/*section_id=*/0,
@@ -115,11 +109,9 @@ TEST(PostingListIntegerIndexSerializerTest, PrependDataNotFull) {
PostingListIntegerIndexSerializer serializer;
int size = 2551 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// Make used.
IntegerIndexData data0(/*section_id=*/0, /*document_id=*/0, /*key=*/2);
@@ -153,11 +145,9 @@ TEST(PostingListIntegerIndexSerializerTest, PrependDataAlmostFull) {
PostingListIntegerIndexSerializer serializer;
int size = 4 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// Fill up the compressed region.
// Transitions:
@@ -197,11 +187,9 @@ TEST(PostingListIntegerIndexSerializerTest, PrependDataPostingListUsedMinSize) {
PostingListIntegerIndexSerializer serializer;
int size = serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
// PL State: EMPTY
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
@@ -235,11 +223,9 @@ TEST(PostingListIntegerIndexSerializerTest,
PostingListIntegerIndexSerializer serializer;
int size = 6 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_in;
std::vector<IntegerIndexData> data_pushed;
@@ -315,11 +301,9 @@ TEST(PostingListIntegerIndexSerializerTest, PrependDataArrayKeepPrepended) {
PostingListIntegerIndexSerializer serializer;
int size = 6 * sizeof(IntegerIndexData);
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_in;
std::vector<IntegerIndexData> data_pushed;
@@ -371,11 +355,9 @@ TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
PostingListIntegerIndexSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf1.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr1 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
@@ -385,11 +367,9 @@ TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
/*keep_prepended=*/false),
Eq(data_arr1.size()));
- std::unique_ptr<char[]> buf2 = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf2.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr2 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0),
IntegerIndexData(/*section_id=*/0, /*document_id=*/3, /*key=*/-3),
@@ -413,11 +393,9 @@ TEST(PostingListIntegerIndexSerializerTest,
PostingListIntegerIndexSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5)};
@@ -443,11 +421,9 @@ TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
PostingListIntegerIndexSerializer serializer;
int size1 = 3 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(size1);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf1.get()), size1));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size1));
std::vector<IntegerIndexData> data_arr1 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5),
@@ -460,11 +436,9 @@ TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
Eq(data_arr1.size()));
int size2 = serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf2 = std::make_unique<char[]>(size2);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf2.get()), size2));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size2));
std::vector<IntegerIndexData> data_arr2 = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/5, /*key=*/-200)};
ASSERT_THAT(
@@ -486,11 +460,9 @@ TEST(PostingListIntegerIndexSerializerTest, PopFrontData) {
PostingListIntegerIndexSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
- std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
- PostingListUsed::CreateFromUnitializedRegion(
- &serializer, static_cast<void*>(buf.get()), size));
+ PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
std::vector<IntegerIndexData> data_arr = {
IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
diff --git a/icing/index/section-indexing-handler.h b/icing/index/section-indexing-handler.h
index ff461cb..98efc8f 100644
--- a/icing/index/section-indexing-handler.h
+++ b/icing/index/section-indexing-handler.h
@@ -32,23 +32,32 @@ class SectionIndexingHandler {
virtual ~SectionIndexingHandler() = default;
// Handles the indexing process: add data (hits) into the specific type index
- // (e.g. string index, integer index) for all contents in the corresponding
- // type of sections in tokenized_document.
+ // (e.g. term index, integer index) for all contents in the corresponding type
+ // of sections in tokenized_document.
// For example, IntegerSectionIndexingHandler::Handle should add data into
// integer index for all contents in tokenized_document.integer_sections.
//
+ // Also it should handle last added DocumentId properly (based on
+ // recovery_mode_) to avoid adding previously indexed documents.
+ //
// tokenized_document: document object with different types of tokenized
// sections.
// document_id: id of the document.
+ // recovery_mode: decides how to handle document_id <=
+ // last_added_document_id. If in recovery_mode, then
+ // Handle() will simply return OK immediately. Otherwise,
+ // returns INVALID_ARGUMENT_ERROR.
// put_document_stats: object for collecting stats during indexing. It can be
// nullptr.
//
/// Returns:
// - OK on success
+ // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+ // document_id of a previously indexed document in non recovery mode
// - Any other errors. It depends on each implementation.
virtual libtextclassifier3::Status Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) = 0;
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) = 0;
protected:
const Clock& clock_;
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index 9a5e299..7cd0909 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -36,18 +36,22 @@ namespace lib {
libtextclassifier3::Status StringSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) {
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (index_.last_added_document_id() != kInvalidDocumentId &&
document_id <= index_.last_added_document_id()) {
+ if (recovery_mode) {
+ // Skip the document if document_id <= last_added_document_id in recovery
+ // mode without returning an error.
+ return libtextclassifier3::Status::OK;
+ }
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"DocumentId %d must be greater than last added document_id %d",
document_id, index_.last_added_document_id()));
}
- // TODO(b/259744228): revisit last_added_document_id with numeric index for
- // index rebuilding before rollout.
index_.set_last_added_document_id(document_id);
+
uint32_t num_tokens = 0;
libtextclassifier3::Status status;
for (const TokenizedSection& section :
diff --git a/icing/index/string-section-indexing-handler.h b/icing/index/string-section-indexing-handler.h
index 4906f97..36f6a05 100644
--- a/icing/index/string-section-indexing-handler.h
+++ b/icing/index/string-section-indexing-handler.h
@@ -38,14 +38,14 @@ class StringSectionIndexingHandler : public SectionIndexingHandler {
~StringSectionIndexingHandler() override = default;
- // Handles the string indexing process: add hits into the lite index for all
- // contents in tokenized_document.tokenized_string_sections and merge lite
+ // Handles the string term indexing process: add hits into the lite index for
+ // all contents in tokenized_document.tokenized_string_sections and merge lite
// index into main index if necessary.
//
/// Returns:
// - OK on success
- // - INVALID_ARGUMENT_ERROR if document_id is less than the document_id of a
- // previously indexed document.
+ // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+ // document_id of a previously indexed document in non recovery mode.
// - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
// content.
// - DATA_LOSS_ERROR if an attempt to merge the index fails and both indices
@@ -54,7 +54,7 @@ class StringSectionIndexingHandler : public SectionIndexingHandler {
// - Any main/lite index errors.
libtextclassifier3::Status Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- PutDocumentStatsProto* put_document_stats) override;
+ bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
const Normalizer& normalizer_;
diff --git a/icing/join/doc-join-info.cc b/icing/join/doc-join-info.cc
new file mode 100644
index 0000000..9bef08a
--- /dev/null
+++ b/icing/join/doc-join-info.cc
@@ -0,0 +1,48 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/doc-join-info.h"
+
+#include <cstdint>
+
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+#include "icing/util/bit-util.h"
+
+namespace icing {
+namespace lib {
+
+DocJoinInfo::DocJoinInfo(DocumentId document_id,
+ JoinablePropertyId joinable_property_id) {
+ value_ = 0;
+ bit_util::BitfieldSet(/*new_value=*/document_id,
+ /*lsb_offset=*/kJoinablePropertyIdBits,
+ /*len=*/kDocumentIdBits, &value_);
+ bit_util::BitfieldSet(/*new_value=*/joinable_property_id,
+ /*lsb_offset=*/0,
+ /*len=*/kJoinablePropertyIdBits, &value_);
+}
+
+DocumentId DocJoinInfo::document_id() const {
+ return bit_util::BitfieldGet(value_, /*lsb_offset=*/kJoinablePropertyIdBits,
+ /*len=*/kDocumentIdBits);
+}
+
+JoinablePropertyId DocJoinInfo::joinable_property_id() const {
+ return bit_util::BitfieldGet(value_, /*lsb_offset=*/0,
+ /*len=*/kJoinablePropertyIdBits);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/doc-join-info.h b/icing/join/doc-join-info.h
new file mode 100644
index 0000000..7696b92
--- /dev/null
+++ b/icing/join/doc-join-info.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_DOC_JOIN_INFO
+#define ICING_JOIN_DOC_JOIN_INFO
+
+#include <cstdint>
+#include <limits>
+
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocJoinInfo is composed of document_id and joinable_property_id.
+class DocJoinInfo {
+ public:
+ // The datatype used to encode DocJoinInfo information: the document_id and
+ // joinable_property_id.
+ using Value = uint32_t;
+
+ static_assert(kDocumentIdBits + kJoinablePropertyIdBits <= sizeof(Value) * 8,
+ "Cannot encode document id and joinable property id in "
+ "DocJoinInfo::Value");
+
+ // All bits of kInvalidValue are 1, and it contains:
+ // - 0b1 for 4 unused bits.
+ // - kInvalidDocumentId (2^22-1).
+ // - JoinablePropertyId 2^6-1 (valid), which is ok because kInvalidDocumentId
+ // has already invalidated the value. In fact, we currently use all 2^6
+ // joinable property ids and there is no "invalid joinable property id", so
+ // it doesn't matter what JoinablePropertyId we set for kInvalidValue.
+ static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
+
+ explicit DocJoinInfo(DocumentId document_id,
+ JoinablePropertyId joinable_property_id);
+
+ explicit DocJoinInfo(Value value = kInvalidValue) : value_(value) {}
+
+ bool is_valid() const { return value_ != kInvalidValue; }
+ Value value() const { return value_; }
+ DocumentId document_id() const;
+ JoinablePropertyId joinable_property_id() const;
+
+ private:
+ // Value bits layout: 4 unused + 22 document_id + 6 joinable_property_id.
+ Value value_;
+} __attribute__((packed));
+static_assert(sizeof(DocJoinInfo) == 4, "");
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_DOC_JOIN_INFO
diff --git a/icing/join/doc-join-info_test.cc b/icing/join/doc-join-info_test.cc
new file mode 100644
index 0000000..7025473
--- /dev/null
+++ b/icing/join/doc-join-info_test.cc
@@ -0,0 +1,96 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/doc-join-info.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+static constexpr DocumentId kSomeDocumentId = 24;
+static constexpr JoinablePropertyId kSomeJoinablePropertyId = 5;
+
+TEST(DocJoinInfoTest, Accessors) {
+ DocJoinInfo doc_join_info(kSomeDocumentId, kSomeJoinablePropertyId);
+ EXPECT_THAT(doc_join_info.document_id(), Eq(kSomeDocumentId));
+ EXPECT_THAT(doc_join_info.joinable_property_id(),
+ Eq(kSomeJoinablePropertyId));
+}
+
+TEST(DocJoinInfoTest, Invalid) {
+ DocJoinInfo default_invalid;
+ EXPECT_THAT(default_invalid.is_valid(), IsFalse());
+
+ // Also make sure the invalid DocJoinInfo contains an invalid document id.
+ EXPECT_THAT(default_invalid.document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(default_invalid.joinable_property_id(),
+ Eq(kMaxJoinablePropertyId));
+}
+
+TEST(DocJoinInfoTest, Valid) {
+ DocJoinInfo maximum_document_id_info(kMaxDocumentId, kSomeJoinablePropertyId);
+ EXPECT_THAT(maximum_document_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(maximum_document_id_info.document_id(), Eq(kMaxDocumentId));
+ EXPECT_THAT(maximum_document_id_info.joinable_property_id(),
+ Eq(kSomeJoinablePropertyId));
+
+ DocJoinInfo maximum_joinable_property_id_info(kSomeDocumentId,
+ kMaxJoinablePropertyId);
+ EXPECT_THAT(maximum_joinable_property_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(maximum_joinable_property_id_info.document_id(),
+ Eq(kSomeDocumentId));
+ EXPECT_THAT(maximum_joinable_property_id_info.joinable_property_id(),
+ Eq(kMaxJoinablePropertyId));
+
+ DocJoinInfo minimum_document_id_info(kMinDocumentId, kSomeJoinablePropertyId);
+ EXPECT_THAT(minimum_document_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(minimum_document_id_info.document_id(), Eq(kMinDocumentId));
+ EXPECT_THAT(minimum_document_id_info.joinable_property_id(),
+ Eq(kSomeJoinablePropertyId));
+
+ DocJoinInfo minimum_joinable_property_id_info(kSomeDocumentId,
+ kMinJoinablePropertyId);
+ EXPECT_THAT(minimum_joinable_property_id_info.is_valid(), IsTrue());
+ EXPECT_THAT(minimum_joinable_property_id_info.document_id(),
+ Eq(kSomeDocumentId));
+ EXPECT_THAT(minimum_joinable_property_id_info.joinable_property_id(),
+ Eq(kMinJoinablePropertyId));
+
+ DocJoinInfo all_maximum_info(kMaxDocumentId, kMaxJoinablePropertyId);
+ EXPECT_THAT(all_maximum_info.is_valid(), IsTrue());
+ EXPECT_THAT(all_maximum_info.document_id(), Eq(kMaxDocumentId));
+ EXPECT_THAT(all_maximum_info.joinable_property_id(),
+ Eq(kMaxJoinablePropertyId));
+
+ DocJoinInfo all_minimum_info(kMinDocumentId, kMinJoinablePropertyId);
+ EXPECT_THAT(all_minimum_info.is_valid(), IsTrue());
+ EXPECT_THAT(all_minimum_info.document_id(), Eq(kMinDocumentId));
+ EXPECT_THAT(all_minimum_info.joinable_property_id(),
+ Eq(kMinJoinablePropertyId));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/join-children-fetcher_test.cc b/icing/join/join-children-fetcher_test.cc
index 75e9a14..92a7a81 100644
--- a/icing/join/join-children-fetcher_test.cc
+++ b/icing/join/join-children-fetcher_test.cc
@@ -10,6 +10,7 @@
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
+// limitations under the License.
#include "icing/join/join-children-fetcher.h"
diff --git a/icing/join/qualified-id-type-joinable-cache.cc b/icing/join/qualified-id-type-joinable-cache.cc
new file mode 100644
index 0000000..4dc6e5a
--- /dev/null
+++ b/icing/join/qualified-id-type-joinable-cache.cc
@@ -0,0 +1,206 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-type-joinable-cache.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/",
+ QualifiedIdTypeJoinableCache::kFilePrefix, ".m");
+}
+
+std::string GetDocumentToQualifiedIdMapperPath(std::string_view working_path) {
+ return absl_ports::StrCat(
+ working_path, "/", QualifiedIdTypeJoinableCache::kFilePrefix, "_mapper");
+}
+
+} // namespace
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdTypeJoinableCache>>
+QualifiedIdTypeJoinableCache::Create(const Filesystem& filesystem,
+ std::string working_path) {
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.DirectoryExists(
+ GetDocumentToQualifiedIdMapperPath(working_path).c_str())) {
+ // Discard working_path if any file/directory is missing, and reinitialize.
+ ICING_RETURN_IF_ERROR(
+ PersistentStorage::Discard(filesystem, working_path, kWorkingPathType));
+ return InitializeNewFiles(filesystem, std::move(working_path));
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path));
+}
+
+QualifiedIdTypeJoinableCache::~QualifiedIdTypeJoinableCache() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING) << "Failed to persist qualified id type joinable cache "
+ "to disk while destructing "
+ << working_path_;
+ }
+}
+
+libtextclassifier3::Status QualifiedIdTypeJoinableCache::Put(
+ const DocJoinInfo& doc_join_info, DocumentId ref_document_id) {
+ if (!doc_join_info.is_valid()) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot put data for an invalid DocJoinInfo");
+ }
+
+ ICING_RETURN_IF_ERROR(document_to_qualified_id_mapper_->Put(
+ encode_util::EncodeIntToCString(doc_join_info.value()), ref_document_id));
+
+ // TODO(b/263890397): add delete propagation
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<DocumentId> QualifiedIdTypeJoinableCache::Get(
+ const DocJoinInfo& doc_join_info) const {
+ if (!doc_join_info.is_valid()) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot get data for an invalid DocJoinInfo");
+ }
+
+ return document_to_qualified_id_mapper_->Get(
+ encode_util::EncodeIntToCString(doc_join_info.value()));
+}
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdTypeJoinableCache>>
+QualifiedIdTypeJoinableCache::InitializeNewFiles(const Filesystem& filesystem,
+ std::string&& working_path) {
+ // Create working directory.
+ if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_path));
+ }
+
+ // Initialize document_to_qualified_id_mapper
+ // TODO(b/263890397): decide PersistentHashMapKeyMapper size
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<KeyMapper<DocumentId>> document_to_qualified_id_mapper,
+ PersistentHashMapKeyMapper<DocumentId>::Create(
+ filesystem, GetDocumentToQualifiedIdMapperPath(working_path)));
+
+ // Create instance.
+ auto new_type_joinable_cache = std::unique_ptr<QualifiedIdTypeJoinableCache>(
+ new QualifiedIdTypeJoinableCache(
+ filesystem, std::move(working_path),
+ /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
+ std::move(document_to_qualified_id_mapper)));
+ // Initialize info content.
+ new_type_joinable_cache->info().magic = Info::kMagic;
+ new_type_joinable_cache->info().last_added_document_id = kInvalidDocumentId;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage. Also write them into disk as well.
+ ICING_RETURN_IF_ERROR(new_type_joinable_cache->InitializeNewStorage());
+ ICING_RETURN_IF_ERROR(new_type_joinable_cache->PersistMetadataToDisk());
+
+ return new_type_joinable_cache;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdTypeJoinableCache>>
+QualifiedIdTypeJoinableCache::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path) {
+ // PRead metadata file.
+ auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
+ if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
+ metadata_buffer.get(), kMetadataFileSize,
+ /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read metadata file");
+ }
+
+ // Initialize document_to_qualified_id_mapper
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<KeyMapper<DocumentId>> document_to_qualified_id_mapper,
+ PersistentHashMapKeyMapper<DocumentId>::Create(
+ filesystem, GetDocumentToQualifiedIdMapperPath(working_path)));
+
+ // Create instance.
+ auto type_joinable_cache = std::unique_ptr<QualifiedIdTypeJoinableCache>(
+ new QualifiedIdTypeJoinableCache(
+ filesystem, std::move(working_path), std::move(metadata_buffer),
+ std::move(document_to_qualified_id_mapper)));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(type_joinable_cache->InitializeExistingStorage());
+
+ // Validate magic.
+ if (type_joinable_cache->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ return type_joinable_cache;
+}
+
+libtextclassifier3::Status
+QualifiedIdTypeJoinableCache::PersistMetadataToDisk() {
+ std::string metadata_file_path = GetMetadataFilePath(working_path_);
+
+ ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError("Fail to open metadata file for write");
+ }
+
+ if (!filesystem_.PWrite(sfd.get(), /*offset=*/0, metadata_buffer_.get(),
+ kMetadataFileSize)) {
+ return absl_ports::InternalError("Fail to write metadata file");
+ }
+
+ if (!filesystem_.DataSync(sfd.get())) {
+ return absl_ports::InternalError("Fail to sync metadata to disk");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status
+QualifiedIdTypeJoinableCache::PersistStoragesToDisk() {
+ return document_to_qualified_id_mapper_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdTypeJoinableCache::ComputeInfoChecksum() {
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdTypeJoinableCache::ComputeStoragesChecksum() {
+ return document_to_qualified_id_mapper_->ComputeChecksum();
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/qualified-id-type-joinable-cache.h b/icing/join/qualified-id-type-joinable-cache.h
new file mode 100644
index 0000000..08f6455
--- /dev/null
+++ b/icing/join/qualified-id-type-joinable-cache.h
@@ -0,0 +1,202 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_TYPE_JOINABLE_CACHE_H_
+#define ICING_JOIN_QUALIFIED_ID_TYPE_JOINABLE_CACHE_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdTypeJoinableCache: a class to maintain cache data mapping
+// DocJoinInfo to joinable qualified ids and delete propagation info.
+class QualifiedIdTypeJoinableCache : public PersistentStorage {
+ public:
+ struct Info {
+ static constexpr int32_t kMagic = 0x48cabdc6;
+
+ int32_t magic;
+ DocumentId last_added_document_id;
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(
+ std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+ }
+ } __attribute__((packed));
+ static_assert(sizeof(Info) == 8, "");
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+ static constexpr int32_t kInfoMetadataBufferOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 20, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
+ static constexpr std::string_view kFilePrefix = "qualified_id_joinable_cache";
+
+ // Creates a QualifiedIdTypeJoinableCache instance to store qualified ids for
+ // future joining search. If any of the underlying file is missing, then
+ // delete the whole working_path and (re)initialize with new ones. Otherwise
+ // initialize and create the instance by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // QualifiedIdTypeJoinableCache uses working path as working
+ // directory and all related files will be stored under this
+ // directory. It takes full ownership and of working_path_,
+ // including creation/deletion. It is the caller's
+ // responsibility to specify correct working path and avoid
+ // mixing different persistent storages together under the same
+ // path. Also the caller has the ownership for the parent
+ // directory of working_path_, and it is responsible for parent
+ // directory creation/deletion. See PersistentStorage for more
+ // details about the concept of working_path.
+ //
+ // Returns:
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum
+ // - INTERNAL_ERROR on I/O errors
+ // - Any KeyMapper errors
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdTypeJoinableCache>>
+ Create(const Filesystem& filesystem, std::string working_path);
+
+ // Delete copy and move constructor/assignment operator.
+ QualifiedIdTypeJoinableCache(const QualifiedIdTypeJoinableCache&) = delete;
+ QualifiedIdTypeJoinableCache& operator=(const QualifiedIdTypeJoinableCache&) =
+ delete;
+
+ QualifiedIdTypeJoinableCache(QualifiedIdTypeJoinableCache&&) = delete;
+ QualifiedIdTypeJoinableCache& operator=(QualifiedIdTypeJoinableCache&&) =
+ delete;
+
+ ~QualifiedIdTypeJoinableCache() override;
+
+ // Puts a new data into cache: DocJoinInfo (DocumentId, JoinablePropertyId)
+ // references to ref_document_id.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+ // - Any KeyMapper errors
+ libtextclassifier3::Status Put(const DocJoinInfo& doc_join_info,
+ DocumentId ref_document_id);
+
+ // Gets the referenced DocumentId by DocJoinInfo.
+ //
+ // Returns:
+ // - DocumentId referenced by the given DocJoinInfo (DocumentId,
+ // JoinablePropertyId) on success
+ // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+ // - NOT_FOUND_ERROR if doc_join_info doesn't exist
+ // - Any KeyMapper errors
+ libtextclassifier3::StatusOr<DocumentId> Get(
+ const DocJoinInfo& doc_join_info) const;
+
+ private:
+ explicit QualifiedIdTypeJoinableCache(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<uint8_t[]> metadata_buffer,
+ std::unique_ptr<KeyMapper<DocumentId>> key_mapper)
+ : PersistentStorage(filesystem, std::move(working_path),
+ kWorkingPathType),
+ metadata_buffer_(std::move(metadata_buffer)),
+ document_to_qualified_id_mapper_(std::move(key_mapper)) {}
+
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdTypeJoinableCache>>
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path);
+
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<QualifiedIdTypeJoinableCache>>
+ InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path);
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk() override;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk() override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+
+ // Computes and returns all storages checksum.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+ kCrcsMetadataBufferOffset);
+ }
+
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+ kInfoMetadataBufferOffset);
+ }
+
+ // Metadata buffer
+ std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+ // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
+ // JoinablePropertyId) to another referenced DocumentId (converted from
+ // qualified id string).
+ std::unique_ptr<KeyMapper<DocumentId>> document_to_qualified_id_mapper_;
+
+ // TODO(b/263890397): add delete propagation storage
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_QUALIFIED_ID_TYPE_JOINABLE_CACHE_H_
diff --git a/icing/join/qualified-id-type-joinable-cache_test.cc b/icing/join/qualified-id-type-joinable-cache_test.cc
new file mode 100644
index 0000000..088c878
--- /dev/null
+++ b/icing/join/qualified-id-type-joinable-cache_test.cc
@@ -0,0 +1,496 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-type-joinable-cache.h"
+
+#include <memory>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsTrue;
+using ::testing::Ne;
+using ::testing::Not;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = QualifiedIdTypeJoinableCache::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+
+class QualifiedIdTypeJoinableCacheTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/qualified_id_type_joinable_cache_test";
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
+};
+
+TEST_F(QualifiedIdTypeJoinableCacheTest, InvalidWorkingPath) {
+ EXPECT_THAT(
+ QualifiedIdTypeJoinableCache::Create(
+ filesystem_, "/dev/null/qualified_id_type_joinable_cache_test"),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest, InitializeNewFiles) {
+ {
+ // Create new qualified id type joinable cache
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ ICING_ASSERT_OK(cache->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", QualifiedIdTypeJoinableCache::kFilePrefix, ".m");
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdTypeJoinableCache::kMetadataFileSize);
+ ASSERT_THAT(
+ filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Check info section
+ const Info* info = reinterpret_cast<const Info*>(
+ metadata_buffer.get() +
+ QualifiedIdTypeJoinableCache::kInfoMetadataBufferOffset);
+ EXPECT_THAT(info->magic, Eq(Info::kMagic));
+ EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
+
+ // Check crcs section
+ const Crcs* crcs = reinterpret_cast<const Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdTypeJoinableCache::kCrcsMetadataBufferOffset);
+ // There are some initial info in KeyMapper, so storages_crc should be
+ // non-zero.
+ EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
+ EXPECT_THAT(crcs->component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs->all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs->component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ // Insert some data.
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+ /*ref_document_id=*/2));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+ /*ref_document_id=*/4));
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ InitializationShouldSucceedWithPersistToDisk) {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache1,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ // Insert some data.
+ ICING_ASSERT_OK(
+ cache1->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+ ICING_ASSERT_OK(
+ cache1->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+ /*ref_document_id=*/2));
+ ICING_ASSERT_OK(
+ cache1->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+ /*ref_document_id=*/4));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(cache1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache2,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ EXPECT_THAT(
+ cache2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)),
+ IsOkAndHolds(0));
+ EXPECT_THAT(
+ cache2->Get(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20)),
+ IsOkAndHolds(2));
+ EXPECT_THAT(
+ cache2->Get(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20)),
+ IsOkAndHolds(4));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ InitializationShouldSucceedAfterDestruction) {
+ {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ // Insert some data.
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+ /*ref_document_id=*/2));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+ /*ref_document_id=*/4));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ EXPECT_THAT(cache->Get(DocJoinInfo(/*document_id=*/1,
+ /*joinable_property_id=*/20)),
+ IsOkAndHolds(0));
+ EXPECT_THAT(cache->Get(DocJoinInfo(/*document_id=*/3,
+ /*joinable_property_id=*/20)),
+ IsOkAndHolds(2));
+ EXPECT_THAT(cache->Get(DocJoinInfo(/*document_id=*/5,
+ /*joinable_property_id=*/20)),
+ IsOkAndHolds(4));
+ }
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ InitializeExistingFilesWithDifferentMagicShouldFail) {
+ {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+
+ ICING_ASSERT_OK(cache->PersistToDisk());
+ }
+
+ {
+ // Manually change magic and update checksum
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", QualifiedIdTypeJoinableCache::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdTypeJoinableCache::kMetadataFileSize);
+ ASSERT_THAT(
+ filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Manually change magic and update checksums.
+ Crcs* crcs = reinterpret_cast<Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdTypeJoinableCache::kCrcsMetadataBufferOffset);
+ Info* info = reinterpret_cast<Info*>(
+ metadata_buffer.get() +
+ QualifiedIdTypeJoinableCache::kInfoMetadataBufferOffset);
+ info->magic += kCorruptedValueOffset;
+ crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+ crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id type joinable cache with different
+ // magic. This should fail.
+ EXPECT_THAT(QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Incorrect magic value")));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+
+ ICING_ASSERT_OK(cache->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", QualifiedIdTypeJoinableCache::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdTypeJoinableCache::kMetadataFileSize);
+ ASSERT_THAT(
+ filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Manually corrupt all_crc
+ Crcs* crcs = reinterpret_cast<Crcs*>(
+ metadata_buffer.get() +
+ QualifiedIdTypeJoinableCache::kCrcsMetadataBufferOffset);
+ crcs->all_crc += kCorruptedValueOffset;
+
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id type joinable cache with metadata
+ // containing corrupted all_crc. This should fail.
+ EXPECT_THAT(QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid all crc")));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+
+ ICING_ASSERT_OK(cache->PersistToDisk());
+ }
+
+ {
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", QualifiedIdTypeJoinableCache::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+ auto metadata_buffer = std::make_unique<uint8_t[]>(
+ QualifiedIdTypeJoinableCache::kMetadataFileSize);
+ ASSERT_THAT(
+ filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize,
+ /*offset=*/0),
+ IsTrue());
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ Info* info = reinterpret_cast<Info*>(
+ metadata_buffer.get() +
+ QualifiedIdTypeJoinableCache::kInfoMetadataBufferOffset);
+ info->last_added_document_id += kCorruptedValueOffset;
+
+ ASSERT_THAT(filesystem_.PWrite(
+ metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+ QualifiedIdTypeJoinableCache::kMetadataFileSize),
+ IsTrue());
+ }
+
+ // Attempt to create the qualified id type joinable cache with info that
+ // doesn't match its checksum. This should fail.
+ EXPECT_THAT(QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid info crc")));
+}
+
+TEST_F(
+ QualifiedIdTypeJoinableCacheTest,
+ InitializeExistingFilesWithCorruptedDocumentToQualifiedIdMapperShouldFail) {
+ {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ ICING_ASSERT_OK(
+ cache->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+ /*ref_document_id=*/0));
+
+ ICING_ASSERT_OK(cache->PersistToDisk());
+ }
+
+ {
+ // Corrupt document_to_qualified_id_mapper manually.
+ std::string mapper_working_path = absl_ports::StrCat(
+ working_path_, "/", QualifiedIdTypeJoinableCache::kFilePrefix,
+ "_mapper");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMapKeyMapper<DocumentId>> mapper,
+ PersistentHashMapKeyMapper<DocumentId>::Create(
+ filesystem_, std::move(mapper_working_path)));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
+ ICING_ASSERT_OK(mapper->Put("foo", 12345));
+ ICING_ASSERT_OK(mapper->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, mapper->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
+
+ // Attempt to create the qualified id type joinable cache with corrupted
+ // document_to_qualified_id_mapper. This should fail.
+ EXPECT_THAT(QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+ HasSubstr("Invalid storages crc")));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest, InvalidPut) {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ DocJoinInfo default_invalid;
+ EXPECT_THAT(cache->Put(default_invalid, /*ref_document_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest, InvalidGet) {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ DocJoinInfo default_invalid;
+ EXPECT_THAT(cache->Get(default_invalid),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest, PutAndGet) {
+ DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
+ DocumentId ref_document1 = 0;
+
+ DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/13);
+ DocumentId ref_document2 = 2;
+
+ DocJoinInfo target_info3(/*document_id=*/4, /*joinable_property_id=*/4);
+ DocumentId ref_document3 = ref_document1;
+
+ {
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ EXPECT_THAT(cache->Put(target_info1, /*ref_document_id=*/ref_document1),
+ IsOk());
+ EXPECT_THAT(cache->Put(target_info2, /*ref_document_id=*/ref_document2),
+ IsOk());
+ EXPECT_THAT(cache->Put(target_info3, /*ref_document_id=*/ref_document3),
+ IsOk());
+
+ EXPECT_THAT(cache->Get(target_info1), IsOkAndHolds(ref_document1));
+ EXPECT_THAT(cache->Get(target_info2), IsOkAndHolds(ref_document2));
+ EXPECT_THAT(cache->Get(target_info3), IsOkAndHolds(ref_document3));
+
+ ICING_ASSERT_OK(cache->PersistToDisk());
+ }
+
+ // Verify we can get all of them after destructing and re-initializing.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+ EXPECT_THAT(cache->Get(target_info1), IsOkAndHolds(ref_document1));
+ EXPECT_THAT(cache->Get(target_info2), IsOkAndHolds(ref_document2));
+ EXPECT_THAT(cache->Get(target_info3), IsOkAndHolds(ref_document3));
+}
+
+TEST_F(QualifiedIdTypeJoinableCacheTest,
+ GetShouldReturnNotFoundErrorIfNotExist) {
+ DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20);
+ DocumentId ref_document = 0;
+
+ // Create new qualified id type joinable cache
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableCache> cache,
+ QualifiedIdTypeJoinableCache::Create(filesystem_, working_path_));
+
+ // Verify entry is not found in the beginning.
+ EXPECT_THAT(cache->Get(target_info),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK(cache->Put(target_info, /*ref_document_id=*/ref_document));
+ ASSERT_THAT(cache->Get(target_info), IsOkAndHolds(ref_document));
+
+ // Get another non-existing entry. This should get NOT_FOUND_ERROR.
+ DocJoinInfo another_target_info(/*document_id=*/2,
+ /*joinable_property_id=*/20);
+ EXPECT_THAT(cache->Get(another_target_info),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/query/advanced_query_parser/function.cc b/icing/query/advanced_query_parser/function.cc
new file mode 100644
index 0000000..e7938db
--- /dev/null
+++ b/icing/query/advanced_query_parser/function.cc
@@ -0,0 +1,77 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/function.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/*static*/ libtextclassifier3::StatusOr<Function> Function::Create(
+ DataType return_type, std::string name, std::vector<Param> params,
+ Function::EvalFunction eval) {
+ bool has_had_optional = false;
+ for (int i = 0; i < params.size(); ++i) {
+ switch (params.at(i).cardinality) {
+ case Cardinality::kVariable:
+ if (i != params.size() - 1) {
+ return absl_ports::InvalidArgumentError(
+ "Can only specify a variable param as the final param.");
+ }
+ break;
+ case Cardinality::kOptional:
+ has_had_optional = true;
+ break;
+ case Cardinality::kRequired:
+ if (has_had_optional) {
+ return absl_ports::InvalidArgumentError(
+ "Can't specify optional params followed by required params.");
+ }
+ break;
+ }
+ }
+ return Function(return_type, std::move(name), std::move(params),
+ std::move(eval));
+}
+
+libtextclassifier3::StatusOr<PendingValue> Function::Eval(
+ std::vector<PendingValue>&& args) const {
+ for (int i = 0; i < params_.size() || i < args.size(); ++i) {
+ if (i < args.size() && i < params_.size()) {
+ ICING_RETURN_IF_ERROR(params_.at(i).Matches(args.at(i)));
+ } else if (i >= params_.size()) {
+ // There are remaining args. This would happen if the final arg is
+ // kVariable.
+ if (params_.empty() ||
+ params_.rbegin()->cardinality != Cardinality::kVariable) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Expected to find only ", std::to_string(params_.size()),
+ " arguments, but found ", std::to_string(args.size())));
+ }
+ ICING_RETURN_IF_ERROR(params_.rbegin()->Matches(args.at(i)));
+ } else if (params_.at(i).cardinality == Cardinality::kRequired) {
+ // There are no more args, but there are still params to check for. If
+ // These params are kRequired, then there is an error.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Expected to find ", std::to_string(i + 1), "th argument, but only ",
+ std::to_string(args.size()), " arguments provided."));
+ }
+ }
+ return eval_(std::move(args));
+}
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/advanced_query_parser/function.h b/icing/query/advanced_query_parser/function.h
new file mode 100644
index 0000000..3514878
--- /dev/null
+++ b/icing/query/advanced_query_parser/function.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
+
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+
+namespace icing {
+namespace lib {
+
+class Function {
+ public:
+ using EvalFunction = std::function<libtextclassifier3::StatusOr<PendingValue>(
+ std::vector<PendingValue>&&)>;
+
+ static libtextclassifier3::StatusOr<Function> Create(
+ DataType return_type, std::string name, std::vector<Param> params,
+ EvalFunction eval);
+
+ Function(const Function& rhs) = default;
+ Function(Function&& rhs) = default;
+
+ Function& operator=(const Function& rhs) = default;
+ Function& operator=(Function&& rhs) = default;
+
+ const std::string& name() const { return name_; }
+
+ libtextclassifier3::StatusOr<PendingValue> Eval(
+ std::vector<PendingValue>&& args) const;
+
+ private:
+ Function(DataType return_type, std::string name, std::vector<Param> params,
+ EvalFunction eval)
+ : name_(std::move(name)),
+ params_(std::move(params)),
+ eval_(std::move(eval)),
+ return_type_(return_type) {}
+
+ std::string name_;
+ std::vector<Param> params_;
+ EvalFunction eval_;
+ DataType return_type_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
diff --git a/icing/query/advanced_query_parser/function_test.cc b/icing/query/advanced_query_parser/function_test.cc
new file mode 100644
index 0000000..f9aaed6
--- /dev/null
+++ b/icing/query/advanced_query_parser/function_test.cc
@@ -0,0 +1,308 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/function.h"
+
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::IsTrue;
+
+struct TrivialEval {
+ libtextclassifier3::StatusOr<PendingValue> operator()(
+ const std::vector<PendingValue>&) const {
+ return PendingValue();
+ }
+};
+
+TEST(FunctionTest, NoParamCreateSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(/*return_type=*/DataType::kString,
+ "foo", /*params=*/{}, TrivialEval()));
+ // foo()
+ std::vector<PendingValue> empty_args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+ function.Eval(std::move(empty_args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, NoParamNonEmptyArgsFails) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(/*return_type=*/DataType::kString,
+ "foo", /*params=*/{}, TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue());
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamNotWrongTypeFails) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(/*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString)}, TrivialEval()));
+ // foo(bar)
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateTextPendingValue("bar"));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamRequiredArgSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(/*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString)}, TrivialEval()));
+
+ // foo("bar")
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, ParamRequiredArgNotPresentFails) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(/*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString)}, TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> empty_args;
+ EXPECT_THAT(function.Eval(std::move(empty_args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamOptionalArgNotPresentSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString, Cardinality::kOptional)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> empty_args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+ function.Eval(std::move(empty_args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, ParamVariableArgNotPresentSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function,
+ Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/{Param(DataType::kString, Cardinality::kVariable)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> empty_args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+ function.Eval(std::move(empty_args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsTrailingOptionalSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kRequired),
+ Param(DataType::kString, Cardinality::kOptional)},
+ TrivialEval()));
+
+ // foo("bar")
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar", "baz")
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ args.push_back(PendingValue::CreateStringPendingValue("baz"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsTrailingVariableSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kRequired),
+ Param(DataType::kString, Cardinality::kVariable)},
+ TrivialEval()));
+
+ // foo("bar")
+ std::vector<PendingValue> args;
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar", "baz")
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ args.push_back(PendingValue::CreateStringPendingValue("baz"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar", "baz", "bat")
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ args.push_back(PendingValue::CreateStringPendingValue("baz"));
+ args.push_back(PendingValue::CreateStringPendingValue("bat"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeRequiredFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kOptional),
+ Param(DataType::kString, Cardinality::kRequired)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeOptionalSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kOptional),
+ Param(DataType::kText, Cardinality::kOptional)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar")
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar", baz)
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ args.push_back(PendingValue::CreateTextPendingValue("baz"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo(baz)
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateTextPendingValue("baz"));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeVariableSucceeds) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ Function function, Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kOptional),
+ Param(DataType::kText, Cardinality::kVariable)},
+ TrivialEval()));
+
+ // foo()
+ std::vector<PendingValue> args;
+ ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar")
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar", baz)
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ args.push_back(PendingValue::CreateTextPendingValue("baz"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo("bar", baz, bat)
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateStringPendingValue("bar"));
+ args.push_back(PendingValue::CreateTextPendingValue("baz"));
+ args.push_back(PendingValue::CreateTextPendingValue("bat"));
+ ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+ EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+ // foo(baz)
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateTextPendingValue("baz"));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // foo(baz, bat)
+ args = std::vector<PendingValue>();
+ args.push_back(PendingValue::CreateTextPendingValue("baz"));
+ args.push_back(PendingValue::CreateTextPendingValue("bat"));
+ EXPECT_THAT(function.Eval(std::move(args)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeRequiredFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kVariable),
+ Param(DataType::kString, Cardinality::kRequired)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeOptionalFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kVariable),
+ Param(DataType::kString, Cardinality::kOptional)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeVariableFails) {
+ EXPECT_THAT(Function::Create(
+ /*return_type=*/DataType::kString, "foo",
+ /*params=*/
+ {Param(DataType::kString, Cardinality::kVariable),
+ Param(DataType::kString, Cardinality::kVariable)},
+ TrivialEval()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/advanced_query_parser/param.h b/icing/query/advanced_query_parser/param.h
new file mode 100644
index 0000000..69c46be
--- /dev/null
+++ b/icing/query/advanced_query_parser/param.h
@@ -0,0 +1,57 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class Cardinality {
+ kRequired,
+ kOptional,
+ kVariable,
+};
+
+struct Param {
+ explicit Param(DataType data_type,
+ Cardinality cardinality = Cardinality::kRequired)
+ : data_type(data_type), cardinality(cardinality) {}
+
+ libtextclassifier3::Status Matches(PendingValue& arg) const {
+ bool matches = arg.data_type() == data_type;
+ // Values of type kText could also potentially be valid kLong values. If
+ // we're expecting a kLong and we have a kText, try to parse it as a kLong.
+ if (!matches && data_type == DataType::kLong &&
+ arg.data_type() == DataType::kText) {
+ ICING_RETURN_IF_ERROR(arg.ParseInt());
+ matches = true;
+ }
+ return matches ? libtextclassifier3::Status::OK
+ : absl_ports::InvalidArgumentError(
+ "Provided arg doesn't match required param type.");
+ }
+
+ DataType data_type;
+ Cardinality cardinality;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
diff --git a/icing/query/advanced_query_parser/pending-value.cc b/icing/query/advanced_query_parser/pending-value.cc
new file mode 100644
index 0000000..306812d
--- /dev/null
+++ b/icing/query/advanced_query_parser/pending-value.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/pending-value.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status PendingValue::ParseInt() {
+ if (data_type_ == DataType::kLong) {
+ return libtextclassifier3::Status::OK;
+ } else if (data_type_ != DataType::kText) {
+ return absl_ports::InvalidArgumentError("Cannot parse value as LONG");
+ }
+ char* value_end;
+ long_val_ = std::strtoll(string_vals_.at(0).c_str(), &value_end, /*base=*/10);
+ if (value_end != string_vals_.at(0).c_str() + string_vals_.at(0).length()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Unable to parse \"", string_vals_.at(0), "\" as number."));
+ }
+ data_type_ = DataType::kLong;
+ string_vals_.clear();
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/query/advanced_query_parser/pending-value.h b/icing/query/advanced_query_parser/pending-value.h
new file mode 100644
index 0000000..8a8704d
--- /dev/null
+++ b/icing/query/advanced_query_parser/pending-value.h
@@ -0,0 +1,152 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class DataType {
+ kNone,
+ kLong,
+ kText,
+ kString,
+ kStringList,
+ kDocumentIterator,
+};
+
+// A holder for intermediate results when processing child nodes.
+struct PendingValue {
+ static PendingValue CreateStringPendingValue(std::string str) {
+ return PendingValue(std::move(str), DataType::kString);
+ }
+
+ static PendingValue CreateTextPendingValue(std::string text) {
+ return PendingValue(std::move(text), DataType::kText);
+ }
+
+ PendingValue() : data_type_(DataType::kNone) {}
+
+ explicit PendingValue(std::unique_ptr<DocHitInfoIterator> iterator)
+ : iterator_(std::move(iterator)),
+ data_type_(DataType::kDocumentIterator) {}
+
+ explicit PendingValue(std::vector<std::string> string_lists)
+ : string_vals_(std::move(string_lists)),
+ data_type_(DataType::kStringList) {}
+
+ PendingValue(const PendingValue&) = delete;
+ PendingValue(PendingValue&&) = default;
+
+ PendingValue& operator=(const PendingValue&) = delete;
+ PendingValue& operator=(PendingValue&&) = default;
+
+ // Placeholder is used to indicate where the children of a particular node
+ // begin.
+ bool is_placeholder() const { return data_type_ == DataType::kNone; }
+
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+ iterator() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kDocumentIterator));
+ return std::move(iterator_);
+ }
+
+ libtextclassifier3::StatusOr<const std::vector<std::string>*> string_vals()
+ const& {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kStringList));
+ return &string_vals_;
+ }
+ libtextclassifier3::StatusOr<std::vector<std::string>> string_vals() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kStringList));
+ return std::move(string_vals_);
+ }
+
+ libtextclassifier3::StatusOr<const std::string*> string_val() const& {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kString));
+ return &string_vals_.at(0);
+ }
+ libtextclassifier3::StatusOr<std::string> string_val() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kString));
+ return std::move(string_vals_.at(0));
+ }
+
+ libtextclassifier3::StatusOr<const std::string*> text_val() const& {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kText));
+ return &string_vals_.at(0);
+ }
+ libtextclassifier3::StatusOr<std::string> text_val() && {
+ ICING_RETURN_IF_ERROR(CheckDataType(DataType::kText));
+ return std::move(string_vals_.at(0));
+ }
+
+ libtextclassifier3::StatusOr<int64_t> long_val() {
+ ICING_RETURN_IF_ERROR(ParseInt());
+ return long_val_;
+ }
+
+ // Attempts to interpret the value as an int. A pending value can be parsed as
+ // an int under two circumstances:
+ // 1. It holds a kText value which can be parsed to an int
+ // 2. It holds a kLong value
+ // If #1 is true, then the parsed value will be stored in long_value and
+ // data_type will be updated to kLong.
+ // RETURNS:
+ // - OK, if able to successfully parse the value into a long
+ // - INVALID_ARGUMENT if the value could not be parsed as a long
+ libtextclassifier3::Status ParseInt();
+
+ DataType data_type() const { return data_type_; }
+
+ private:
+ explicit PendingValue(std::string str, DataType data_type)
+ : string_vals_({std::move(str)}), data_type_(data_type) {}
+
+ libtextclassifier3::Status CheckDataType(DataType required_data_type) const {
+ if (data_type_ == required_data_type) {
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Unable to retrieve value of type '",
+ std::to_string(static_cast<int>(required_data_type)),
+ "' from pending value of type '",
+ std::to_string(static_cast<int>(data_type_)), "'"));
+ }
+
+ // iterator_ will be populated when data_type_ is kDocumentIterator.
+ std::unique_ptr<DocHitInfoIterator> iterator_;
+
+ // string_vals_ will be populated when data_type_ is kString, kText and
+ // kStringList.
+ std::vector<std::string> string_vals_;
+
+ // long_val_ will be populated when data_type_ is kLong - after a successful
+ // call to ParseInt.
+ int64_t long_val_;
+ DataType data_type_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
index 659ad7b..71e1c7c 100644
--- a/icing/query/advanced_query_parser/query-visitor.cc
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -14,22 +14,33 @@
#include "icing/query/advanced_query_parser/query-visitor.h"
+#include <algorithm>
#include <cstdint>
#include <cstdlib>
+#include <iterator>
#include <limits>
#include <memory>
+#include <set>
+#include <string>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+#include "icing/index/iterator/doc-hit-info-iterator-none.h"
#include "icing/index/iterator/doc-hit-info-iterator-not.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/parser.h"
#include "icing/query/query-features.h"
-#include "icing/schema/section-manager.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/tokenizer.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -37,7 +48,20 @@ namespace lib {
namespace {
-libtextclassifier3::StatusOr<std::string> EscapeStringValue(
+struct CreateList {
+ libtextclassifier3::StatusOr<PendingValue> operator()(
+ std::vector<PendingValue>&& args) const {
+ std::vector<std::string> values;
+ values.reserve(args.size());
+ for (PendingValue& arg : args) {
+ std::string val = std::move(arg).string_val().ValueOrDie();
+ values.push_back(std::move(val));
+ }
+ return PendingValue(std::move(values));
+ }
+};
+
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
std::string_view value) {
std::string result;
bool in_escape = false;
@@ -105,26 +129,66 @@ libtextclassifier3::StatusOr<Int64Range> GetInt64Range(
} // namespace
+void QueryVisitor::PendingPropertyRestricts::AddValidRestricts(
+ std::set<std::string> new_restricts) {
+ if (!has_active_property_restricts()) {
+ pending_property_restricts_.push_back(std::move(new_restricts));
+ return;
+ }
+
+ // There is an active property restrict already in effect. To determine the
+ // updated active property restrict being applied at this level, we need to
+ // calculate the intersection of new_restricts and
+ // active_property_restricts.
+ const std::set<std::string>& active_restricts = active_property_restricts();
+ auto active_restricts_itr = active_restricts.begin();
+ for (auto new_restricts_itr = new_restricts.begin();
+ new_restricts_itr != new_restricts.end();) {
+ while (active_restricts_itr != active_restricts.end() &&
+ *active_restricts_itr < *new_restricts_itr) {
+ // new_restricts_itr is behind active_restricts_itr.
+ ++active_restricts_itr;
+ }
+ if (active_restricts_itr == active_restricts.end()) {
+ // There's nothing left in active restricts. Everything at
+ // new_restricts_itr and beyond should be removed
+ new_restricts_itr =
+ new_restricts.erase(new_restricts_itr, new_restricts.end());
+ } else if (*active_restricts_itr > *new_restricts_itr) {
+ // new_restricts_itr points to elements not present in
+ // active_restricts_itr
+ new_restricts_itr = new_restricts.erase(new_restricts_itr);
+ } else {
+ // the element that new_restricts_itr points to is present in
+ // active_restricts_itr.
+ ++new_restricts_itr;
+ }
+ }
+ pending_property_restricts_.push_back(std::move(new_restricts));
+}
+
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
QueryVisitor::CreateTermIterator(const std::string& term) {
if (!processing_not_) {
// 1. Add term to property_query_terms_map
- auto property_restrict_or = GetPropertyRestrict();
- if (property_restrict_or.ok()) {
- std::string property_restrict =
- std::move(property_restrict_or).ValueOrDie();
- property_query_terms_map_[std::move(property_restrict)].insert(term);
+ if (pending_property_restricts_.has_active_property_restricts()) {
+ for (const std::string& property_restrict :
+ pending_property_restricts_.active_property_restricts()) {
+ property_query_terms_map_[property_restrict].insert(term);
+ }
} else {
- ICING_LOG(DBG) << "Unsatisfiable property restrict, "
- << property_restrict_or.status().error_message();
+ property_query_terms_map_[""].insert(term);
}
// 2. If needed add term iterator to query_term_iterators_ map.
if (needs_term_frequency_info_) {
+ // TODO(b/152934343) Save "term start index" into Node and PendingValue and
+ // pass it into index.GetIterator
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> term_iterator,
- index_.GetIterator(term, kSectionIdMaskAll, match_type_,
- needs_term_frequency_info_));
+ index_.GetIterator(term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ match_type_, needs_term_frequency_info_));
query_term_iterators_[term] = std::make_unique<DocHitInfoIteratorFilter>(
std::move(term_iterator), &document_store_, &schema_store_,
filter_options_);
@@ -133,43 +197,133 @@ QueryVisitor::CreateTermIterator(const std::string& term) {
// 3. Add the term iterator.
// TODO(b/208654892): Add support for the prefix operator (*).
- return index_.GetIterator(term, kSectionIdMaskAll, match_type_,
- needs_term_frequency_info_);
+ // TODO(b/152934343) Save "term start index" into Node and PendingValue and
+ // pass it into index.GetIterator
+ return index_.GetIterator(term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ match_type_, needs_term_frequency_info_);
+}
+
+void QueryVisitor::RegisterFunctions() {
+ // std::vector<std::string> createList(std::string...);
+ Function create_list_function_ =
+ Function::Create(DataType::kStringList, "createList",
+ {Param(DataType::kString, Cardinality::kRequired),
+ Param(DataType::kString, Cardinality::kVariable)},
+ CreateList())
+ .ValueOrDie();
+ registered_functions_.insert(
+ {create_list_function_.name(), std::move(create_list_function_)});
+
+ // DocHitInfoIterator search(std::string);
+ // DocHitInfoIterator search(std::string, std::vector<std::string>);
+ Function::EvalFunction search_eval =
+ std::bind(&QueryVisitor::SearchFunction, this, std::placeholders::_1);
+ Function search_function =
+ Function::Create(DataType::kDocumentIterator, "search",
+ {Param(DataType::kString),
+ Param(DataType::kStringList, Cardinality::kOptional)},
+ std::move(search_eval))
+ .ValueOrDie();
+ registered_functions_.insert(
+ {search_function.name(), std::move(search_function)});
+}
+
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
+ std::vector<PendingValue>&& args) {
+ // The second arg (if present) is a list of sections to restrict to.
+ if (args.size() == 2) {
+ std::set<std::string> new_restricts;
+ std::vector<std::string> property_restricts =
+ std::move(args.at(1)).string_vals().ValueOrDie();
+ for (std::string& property_restrict : property_restricts) {
+ new_restricts.insert(std::move(property_restrict));
+ }
+ pending_property_restricts_.AddValidRestricts(std::move(new_restricts));
+ if (pending_property_restricts_.active_property_restricts().empty()) {
+ pending_property_restricts_.PopRestricts();
+ return PendingValue(std::make_unique<DocHitInfoIteratorNone>());
+ }
+ }
+
+ // The first arg is guaranteed to be a STRING at this point. It should be safe
+ // to call ValueOrDie.
+ const std::string* query = args.at(0).string_val().ValueOrDie();
+ Lexer lexer(*query, Lexer::Language::QUERY);
+ ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+ lexer.ExtractTokens());
+
+ Parser parser = Parser::Create(std::move(lexer_tokens));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
+ parser.ConsumeQuery());
+
+ std::unique_ptr<DocHitInfoIterator> iterator;
+ QueryResults query_result;
+ if (tree_root == nullptr) {
+ iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_.last_added_document_id());
+ } else {
+ QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
+ &schema_store_, &normalizer_, &tokenizer_,
+ filter_options_, match_type_,
+ needs_term_frequency_info_,
+ pending_property_restricts_, processing_not_);
+ tree_root->Accept(&query_visitor);
+ ICING_ASSIGN_OR_RETURN(query_result,
+ std::move(query_visitor).ConsumeResults());
+ iterator = std::move(query_result.root_iterator);
+ }
+
+ // Update members based on results of processing the query.
+ if (args.size() == 2 &&
+ pending_property_restricts_.has_active_property_restricts()) {
+ iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(iterator), &document_store_, &schema_store_,
+ pending_property_restricts_.active_property_restricts());
+ pending_property_restricts_.PopRestricts();
+ }
+ if (!processing_not_) {
+ std::move(
+ query_result.query_term_iterators.begin(),
+ query_result.query_term_iterators.end(),
+ std::inserter(query_term_iterators_, query_term_iterators_.end()));
+
+ std::move(query_result.query_terms.begin(), query_result.query_terms.end(),
+ std::inserter(property_query_terms_map_,
+ property_query_terms_map_.end()));
+ }
+ std::move(query_result.features_in_use.begin(),
+ query_result.features_in_use.end(),
+ std::inserter(features_, features_.end()));
+ return PendingValue(std::move(iterator));
}
libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
- if (pending_values_.empty() ||
- pending_values_.top().data_type() != PendingValue::DataType::kText) {
+ if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
}
- const std::string& value = pending_values_.top().term();
- char* value_end;
- int64_t int_value = std::strtoll(value.c_str(), &value_end, /*base=*/10);
- if (value_end != value.c_str() + value.length()) {
- return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Unable to parse \"", value, "\" as number."));
- }
+ ICING_ASSIGN_OR_RETURN(int64_t int_value, pending_values_.top().long_val());
pending_values_.pop();
return int_value;
}
libtextclassifier3::StatusOr<std::string>
QueryVisitor::PopPendingStringValue() {
- if (pending_values_.empty() ||
- pending_values_.top().data_type() != PendingValue::DataType::kString) {
- return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
+ if (pending_values_.empty()) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve string value.");
}
- std::string string_value = std::move(pending_values_.top().term());
+ ICING_ASSIGN_OR_RETURN(std::string string_value,
+ std::move(pending_values_.top()).string_val());
pending_values_.pop();
return string_value;
}
libtextclassifier3::StatusOr<std::string> QueryVisitor::PopPendingTextValue() {
- if (pending_values_.empty() ||
- pending_values_.top().data_type() != PendingValue::DataType::kText) {
+ if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
}
- std::string text_value = std::move(pending_values_.top().term());
+ ICING_ASSIGN_OR_RETURN(std::string text_value,
+ std::move(pending_values_.top()).text_val());
pending_values_.pop();
return text_value;
}
@@ -179,14 +333,12 @@ QueryVisitor::PopPendingIterator() {
if (pending_values_.empty() || pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError("Unable to retrieve iterator.");
}
- if (pending_values_.top().data_type() ==
- PendingValue::DataType::kDocIterator) {
+ if (pending_values_.top().data_type() == DataType::kDocumentIterator) {
std::unique_ptr<DocHitInfoIterator> iterator =
- std::move(pending_values_.top().iterator());
+ std::move(pending_values_.top()).iterator().ValueOrDie();
pending_values_.pop();
return iterator;
- } else if (pending_values_.top().data_type() ==
- PendingValue::DataType::kString) {
+ } else if (pending_values_.top().data_type() == DataType::kString) {
features_.insert(kVerbatimSearchFeature);
ICING_ASSIGN_OR_RETURN(std::string value, PopPendingStringValue());
return CreateTermIterator(std::move(value));
@@ -230,7 +382,7 @@ QueryVisitor::PopAllPendingIterators() {
return iterators;
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
+libtextclassifier3::StatusOr<PendingValue>
QueryVisitor::ProcessNumericComparator(const NaryOperatorNode* node) {
// 1. The children should have been processed and added their outputs to
// pending_values_. Time to process them.
@@ -253,49 +405,73 @@ QueryVisitor::ProcessNumericComparator(const NaryOperatorNode* node) {
return PendingValue(std::move(iterator));
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessAndOperator(const NaryOperatorNode* node) {
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessAndOperator(
+ const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
PopAllPendingIterators());
return PendingValue(CreateAndIterator(std::move(iterators)));
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessOrOperator(const NaryOperatorNode* node) {
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessOrOperator(
+ const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
PopAllPendingIterators());
return PendingValue(CreateOrIterator(std::move(iterators)));
}
-libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
-QueryVisitor::ProcessHasOperator(const NaryOperatorNode* node) {
- // The children should have been processed and added their outputs to
- // pending_values_. Time to process them.
- // The first two pending values should be the delegate and the property.
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
- PopPendingIterator());
+libtextclassifier3::Status QueryVisitor::ProcessHasOperator(
+ const NaryOperatorNode* node) {
+ if (node->children().size() != 2) {
+ return absl_ports::InvalidArgumentError("Expected 2 children.");
+ }
+
+ // 1. Put in a placeholder PendingValue
+ pending_values_.push(PendingValue());
+
+ // 2. Visit the first child - the property.
+ node->children().at(0)->Accept(this);
+ if (has_pending_error()) {
+ return pending_error_;
+ }
ICING_ASSIGN_OR_RETURN(std::string property, PopPendingTextValue());
- return PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
- std::move(delegate), &document_store_, &schema_store_,
- std::move(property)));
-}
+ pending_property_restricts_.AddValidRestricts({property});
+
+ // Just added a restrict - if there are no active property restricts then that
+ // be because this restrict is unsatisfiable.
+ if (pending_property_restricts_.active_property_restricts().empty()) {
+ // The property restrict can't be satisfiable. Pop the placeholder that was
+ // just added and push a FALSE iterator.
+ pending_property_restricts_.PopRestricts();
+ pending_values_.pop();
+ pending_values_.push(
+ PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
+ return libtextclassifier3::Status::OK;
+ }
-libtextclassifier3::StatusOr<std::string> QueryVisitor::GetPropertyRestrict()
- const {
- if (pending_property_restricts_.empty()) {
- return "";
+ // 3. Visit the second child - the argument.
+ node->children().at(1)->Accept(this);
+ if (has_pending_error()) {
+ return pending_error_;
}
- const std::string& restrict = pending_property_restricts_.at(0);
- bool valid_restrict = std::all_of(
- pending_property_restricts_.begin(), pending_property_restricts_.end(),
- [&restrict](const std::string& s) { return s == restrict; });
- if (!valid_restrict) {
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
+ PopPendingIterator());
+
+ // 4. Check for the placeholder.
+ if (!pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError(
- "Invalid property restrict provided!");
+ "Error processing arguments for node.");
}
- return pending_property_restricts_.at(0);
+ pending_values_.pop();
+ pending_property_restricts_.PopRestricts();
+
+ std::set<std::string> property_restricts = {std::move(property)};
+ pending_values_.push(
+ PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(delegate), &document_store_, &schema_store_,
+ std::move(property_restricts))));
+ return libtextclassifier3::Status::OK;
}
void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
@@ -305,13 +481,14 @@ void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
void QueryVisitor::VisitString(const StringNode* node) {
// A STRING node can only be a term. Create the iterator now.
- auto escaped_string_or = EscapeStringValue(node->value());
- if (!escaped_string_or.ok()) {
- pending_error_ = std::move(escaped_string_or).status();
+ auto unescaped_string_or = UnescapeStringValue(node->value());
+ if (!unescaped_string_or.ok()) {
+ pending_error_ = std::move(unescaped_string_or).status();
return;
}
- pending_values_.push(PendingValue::CreateStringPendingValue(
- std::move(escaped_string_or).ValueOrDie()));
+ std::string unescaped_string = std::move(unescaped_string_or).ValueOrDie();
+ pending_values_.push(
+ PendingValue::CreateStringPendingValue(std::move(unescaped_string)));
}
void QueryVisitor::VisitText(const TextNode* node) {
@@ -323,6 +500,12 @@ void QueryVisitor::VisitText(const TextNode* node) {
}
void QueryVisitor::VisitMember(const MemberNode* node) {
+ if (node->children().empty()) {
+ pending_error_ =
+ absl_ports::InvalidArgumentError("Encountered malformed member node.");
+ return;
+ }
+
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
@@ -336,19 +519,25 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
// 3. The children should have been processed and added their outputs to
// pending_values_. Time to process them.
- std::string member = std::move(pending_values_.top().term());
- pending_values_.pop();
+ libtextclassifier3::StatusOr<std::string> member_or;
+ std::vector<std::string> members;
while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
- member = absl_ports::StrCat(pending_values_.top().term(),
- kPropertySeparator, member);
- pending_values_.pop();
+ member_or = PopPendingTextValue();
+ if (!member_or.ok()) {
+ pending_error_ = std::move(member_or).status();
+ return;
+ }
+ members.push_back(std::move(member_or).ValueOrDie());
}
+ std::string member =
+ absl_ports::StrJoin(members.rbegin(), members.rend(),
+ property_util::kPropertyPathSeparator);
// 4. If pending_values_ is empty somehow, then our placeholder disappeared
// somehow.
if (pending_values_.empty()) {
pending_error_ = absl_ports::InvalidArgumentError(
- "\"<\" operator must have two arguments.");
+ "Error processing arguments for member node.");
return;
}
pending_values_.pop();
@@ -357,10 +546,53 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
}
void QueryVisitor::VisitFunction(const FunctionNode* node) {
- pending_error_ = absl_ports::UnimplementedError(
- "Function node visiting not implemented yet.");
+ // 1. Get the associated function.
+ auto itr = registered_functions_.find(node->function_name()->value());
+ if (itr == registered_functions_.end()) {
+ pending_error_ = absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Function ", node->function_name()->value(), " is not supported."));
+ return;
+ }
+
+ // 2. Put in a placeholder PendingValue
+ pending_values_.push(PendingValue());
+
+ // 3. Visit the children.
+ for (const std::unique_ptr<Node>& arg : node->args()) {
+ arg->Accept(this);
+ if (has_pending_error()) {
+ return;
+ }
+ }
+
+ // 4. Collect the arguments and evaluate the function.
+ std::vector<PendingValue> args;
+ while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
+ args.push_back(std::move(pending_values_.top()));
+ pending_values_.pop();
+ }
+ std::reverse(args.begin(), args.end());
+ const Function& function = itr->second;
+ auto eval_result = function.Eval(std::move(args));
+ if (!eval_result.ok()) {
+ pending_error_ = std::move(eval_result).status();
+ return;
+ }
+
+ // 5. Pop placeholder in pending_values and add the result of our function.
+ pending_values_.pop();
+ pending_values_.push(std::move(eval_result).ValueOrDie());
}
+// TODO(b/265312785) Clarify handling of the interaction between HAS and NOT.
+// Currently, `prop1:(NOT foo bar)` will not match any documents. Likewise,
+// `search("NOT foo bar", createList("prop1"))` will not match any documents.
+//
+// We should either confirm that this is the desired behavior or consider
+// rewriting these queries so that they're interpreted as
+// `NOT prop1:foo AND prop1:bar` and
+// `NOT search("foo", createList("prop1"))
+// AND search("bar", createList("prop1"))`
void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
if (node->operator_text() != "NOT") {
pending_error_ = absl_ports::UnimplementedError(
@@ -424,24 +656,23 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
return;
}
+ if (node->operator_text() == ":") {
+ libtextclassifier3::Status status = ProcessHasOperator(node);
+ if (!status.ok()) {
+ pending_error_ = std::move(status);
+ }
+ return;
+ }
+
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. Visit the children.
- bool processing_has = node->operator_text() == ":";
for (int i = 0; i < node->children().size(); ++i) {
node->children().at(i)->Accept(this);
if (has_pending_error()) {
return;
}
- if (processing_has && !processing_not_ && i == 0) {
- if (pending_values_.top().data_type() != PendingValue::DataType::kText) {
- pending_error_ = absl_ports::InvalidArgumentError(
- "Expected property before ':' operator.");
- return;
- }
- pending_property_restricts_.push_back(pending_values_.top().term());
- }
}
// 3. Retrieve the pending value for this node.
@@ -452,11 +683,6 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
pending_value_or = ProcessAndOperator(node);
} else if (node->operator_text() == "OR") {
pending_value_or = ProcessOrOperator(node);
- } else if (processing_has) {
- pending_value_or = ProcessHasOperator(node);
- if (!processing_not_) {
- pending_property_restricts_.pop_back();
- }
}
if (!pending_value_or.ok()) {
pending_error_ = std::move(pending_value_or).status();
diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h
index 414f1b9..b4e3dd7 100644
--- a/icing/query/advanced_query_parser/query-visitor.h
+++ b/icing/query/advanced_query_parser/query-visitor.h
@@ -20,12 +20,17 @@
#include <stack>
#include <string>
#include <unordered_set>
+#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/function.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
#include "icing/query/query-features.h"
#include "icing/query/query-results.h"
#include "icing/schema/schema-store.h"
@@ -46,16 +51,12 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
const Normalizer* normalizer, const Tokenizer* tokenizer,
DocHitInfoIteratorFilter::Options filter_options,
TermMatchType::Code match_type, bool needs_term_frequency_info)
- : index_(*index),
- numeric_index_(*numeric_index),
- document_store_(*document_store),
- schema_store_(*schema_store),
- normalizer_(*normalizer),
- tokenizer_(*tokenizer),
- filter_options_(std::move(filter_options)),
- match_type_(match_type),
- needs_term_frequency_info_(needs_term_frequency_info),
- processing_not_(false) {}
+ : QueryVisitor(index, numeric_index, document_store, schema_store,
+ normalizer, tokenizer, filter_options, match_type,
+ needs_term_frequency_info,
+
+ PendingPropertyRestricts(),
+ /*processing_not=*/false) {}
void VisitFunctionName(const FunctionNameNode* node) override;
void VisitString(const StringNode* node) override;
@@ -72,57 +73,56 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
libtextclassifier3::StatusOr<QueryResults> ConsumeResults() &&;
private:
- // A holder for intermediate results when processing child nodes.
- class PendingValue {
+ // An internal class to help manage property restricts being applied at
+ // different levels.
+ class PendingPropertyRestricts {
public:
- enum class DataType {
- kNone,
- // Values of type STRING will eventually be converted to a
- // DocHitInfoIterator further upstream.
- kString,
-
- // Values of type TEXT may be consumed as properties, numbers or converted
- // to DocHitInfoIterators further upstream.
- kText,
- kDocIterator,
- };
-
- static PendingValue CreateStringPendingValue(std::string str) {
- return PendingValue(std::move(str), DataType::kString);
+ // Add another set of property restricts. Elements of new_restricts that are
+ // not present in active_property_rest
+ void AddValidRestricts(std::set<std::string> new_restricts);
+
+ // Pops the most recently added set of property restricts.
+ void PopRestricts() {
+ if (has_active_property_restricts()) {
+ pending_property_restricts_.pop_back();
+ }
}
- static PendingValue CreateTextPendingValue(std::string text) {
- return PendingValue(std::move(text), DataType::kText);
+ bool has_active_property_restricts() const {
+ return !pending_property_restricts_.empty();
}
- PendingValue() : data_type_(DataType::kNone) {}
-
- explicit PendingValue(std::unique_ptr<DocHitInfoIterator> iterator)
- : iterator_(std::move(iterator)), data_type_(DataType::kDocIterator) {}
-
- // Placeholder is used to indicate where the children of a particular node
- // begin.
- bool is_placeholder() const { return data_type_ == DataType::kNone; }
-
- DataType data_type() const { return data_type_; }
-
- std::unique_ptr<DocHitInfoIterator>& iterator() { return iterator_; }
- const std::unique_ptr<DocHitInfoIterator>& iterator() const {
- return iterator_;
+ // The set of all property restrictions that are currently being applied.
+ const std::set<std::string>& active_property_restricts() const {
+ return pending_property_restricts_.back();
}
- std::string& term() { return term_; }
- const std::string& term() const { return term_; }
-
private:
- explicit PendingValue(std::string term, DataType data_type)
- : term_(std::move(term)), data_type_(data_type) {}
-
- std::unique_ptr<DocHitInfoIterator> iterator_;
- std::string term_;
- DataType data_type_;
+ std::vector<std::set<std::string>> pending_property_restricts_;
};
+ explicit QueryVisitor(
+ Index* index, const NumericIndex<int64_t>* numeric_index,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const Normalizer* normalizer, const Tokenizer* tokenizer,
+ DocHitInfoIteratorFilter::Options filter_options,
+ TermMatchType::Code match_type, bool needs_term_frequency_info,
+
+ PendingPropertyRestricts pending_property_restricts, bool processing_not)
+ : index_(*index),
+ numeric_index_(*numeric_index),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ normalizer_(*normalizer),
+ tokenizer_(*tokenizer),
+ filter_options_(std::move(filter_options)),
+ match_type_(match_type),
+ needs_term_frequency_info_(needs_term_frequency_info),
+ pending_property_restricts_(std::move(pending_property_restricts)),
+ processing_not_(processing_not) {
+ RegisterFunctions();
+ }
+
bool has_pending_error() const { return !pending_error_.ok(); }
// Creates a DocHitInfoIterator reflecting the provided term. Also populates,
@@ -204,27 +204,38 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
libtextclassifier3::StatusOr<PendingValue> ProcessOrOperator(
const NaryOperatorNode* node);
- // Processes the HAS operator represented by the node. This must be called
- // *after* this node's children have been visited. The PendingValues added by
- // this node's children will be consumed by this function and the PendingValue
- // for this node will be returned.
- // Returns:
- // - On success, then PendingValue representing this node and it's children.
- // - INVALID_ARGUMENT if unable to properly retrieve an iterator
- // representing the second child
- libtextclassifier3::StatusOr<PendingValue> ProcessHasOperator(
- const NaryOperatorNode* node);
+ // Populates registered_functions with the currently supported set of
+ // functions.
+ void RegisterFunctions();
- // RETURNS:
- // - the current property restrict or empty string if there is no property
- // restrict.
- // - INVALID_ARGUMENT if the current restrict is invalid (ie is a chain of
- // restricts with different properties such as `subject:(body:foo)`).
- libtextclassifier3::StatusOr<std::string> GetPropertyRestrict() const;
+ // Implementation of `search` custom function in the query language.
+ // Returns:
+ // - a PendingValue holding the DocHitInfoIterator reflecting the query
+ // provided to SearchFunction
+ // - any errors returned by Lexer::ExtractTokens, Parser::ConsumeQuery or
+ // QueryVisitor::ConsumeResults.
+ libtextclassifier3::StatusOr<PendingValue> SearchFunction(
+ std::vector<PendingValue>&& args);
+
+ // Handles a NaryOperatorNode where the operator is HAS (':') and pushes an
+ // iterator with the proper section filter applied. If the current property
+ // restriction represented by pending_property_restricts and the first child
+ // of this node is unsatisfiable (ex. `prop1:(prop2:foo)`), then a NONE
+ // iterator is returned immediately and subtree represented by the second
+ // child is not traversed.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT node does not have exactly two children or the two
+ // children cannot be resolved to a MEMBER or an iterator respectively.
+ libtextclassifier3::Status ProcessHasOperator(const NaryOperatorNode* node);
std::stack<PendingValue> pending_values_;
libtextclassifier3::Status pending_error_;
+ // A map from function name to Function instance.
+ std::unordered_map<std::string, Function> registered_functions_;
+
SectionRestrictQueryTermsMap property_query_terms_map_;
QueryTermIteratorsMap query_term_iterators_;
@@ -246,7 +257,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
bool needs_term_frequency_info_;
// The stack of property restricts currently being processed by the visitor.
- std::vector<std::string> pending_property_restricts_;
+ PendingPropertyRestricts pending_property_restricts_;
bool processing_not_;
};
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index 1577a3f..12a6631 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -75,7 +75,12 @@ std::vector<T> ExtractKeys(const std::unordered_map<T, U>& map) {
return keys;
}
-class QueryVisitorTest : public ::testing::Test {
+enum class QueryType {
+ kPlain,
+ kSearch,
+};
+
+class QueryVisitorTest : public ::testing::TestWithParam<QueryType> {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/icing";
@@ -145,6 +150,42 @@ class QueryVisitorTest : public ::testing::Test {
return parser.ConsumeQuery();
}
+ std::string EscapeString(std::string_view str) {
+ std::string result;
+ result.reserve(str.size());
+ for (char c : str) {
+ if (c == '\\' || c == '"') {
+ result.push_back('\\');
+ }
+ result.push_back(c);
+ }
+ return result;
+ }
+
+ std::string CreateQuery(std::string query,
+ std::string property_restrict = "") {
+ switch (GetParam()) {
+ case QueryType::kPlain:
+ if (property_restrict.empty()) {
+ // CreateQuery("foo bar") returns `foo bar`
+ return query;
+ }
+ // CreateQuery("foo", "subject") returns `subject:foo`
+ return absl_ports::StrCat(property_restrict, ":", query);
+ case QueryType::kSearch:
+ query = EscapeString(query);
+ property_restrict = EscapeString(property_restrict);
+ if (property_restrict.empty()) {
+ // CreateQuery("foo bar") returns `search("foo bar")`
+ return absl_ports::StrCat("search(\"", query, "\")");
+ }
+ // CreateQuery("foo", "subject") returns
+ // `search("foo bar", createList("subject"))`
+ return absl_ports::StrCat("search(\"", query, "\", createList(\"",
+ property_restrict, "\"))");
+ }
+ }
+
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
std::string test_dir_;
@@ -163,23 +204,23 @@ class QueryVisitorTest : public ::testing::Test {
std::unique_ptr<const JniCache> jni_cache_;
};
-TEST_F(QueryVisitorTest, SimpleLessThan) {
+TEST_P(QueryVisitorTest, SimpleLessThan) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price < 2";
+ std::string query = CreateQuery("price < 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -200,23 +241,23 @@ TEST_F(QueryVisitorTest, SimpleLessThan) {
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, SimpleLessThanEq) {
+TEST_P(QueryVisitorTest, SimpleLessThanEq) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price <= 1";
+ std::string query = CreateQuery("price <= 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -237,23 +278,23 @@ TEST_F(QueryVisitorTest, SimpleLessThanEq) {
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, SimpleEqual) {
+TEST_P(QueryVisitorTest, SimpleEqual) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price == 2";
+ std::string query = CreateQuery("price == 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -274,23 +315,23 @@ TEST_F(QueryVisitorTest, SimpleEqual) {
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, SimpleGreaterThanEq) {
+TEST_P(QueryVisitorTest, SimpleGreaterThanEq) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price >= 1";
+ std::string query = CreateQuery("price >= 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -311,23 +352,23 @@ TEST_F(QueryVisitorTest, SimpleGreaterThanEq) {
ElementsAre(kDocumentId2, kDocumentId1));
}
-TEST_F(QueryVisitorTest, SimpleGreaterThan) {
+TEST_P(QueryVisitorTest, SimpleGreaterThan) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price > 1";
+ std::string query = CreateQuery("price > 1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -349,24 +390,24 @@ TEST_F(QueryVisitorTest, SimpleGreaterThan) {
}
// TODO(b/208654892) Properly handle negative numbers in query expressions.
-TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanEqual) {
+TEST_P(QueryVisitorTest, DISABLED_IntMinLessThanEqual) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MIN + 1 respectively.
int64_t int_min = std::numeric_limits<int64_t>::min();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(int_min);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(std::numeric_limits<int64_t>::max());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_min + 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price <= " + std::to_string(int_min);
+ std::string query = CreateQuery("price <= " + std::to_string(int_min));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -387,24 +428,24 @@ TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanEqual) {
ElementsAre(kDocumentId0));
}
-TEST_F(QueryVisitorTest, IntMaxGreaterThanEqual) {
+TEST_P(QueryVisitorTest, IntMaxGreaterThanEqual) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MAX - 1 respectively.
int64_t int_max = std::numeric_limits<int64_t>::max();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(std::numeric_limits<int64_t>::min());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(int_max);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_max - 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price >= " + std::to_string(int_max);
+ std::string query = CreateQuery("price >= " + std::to_string(int_max));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -425,25 +466,25 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanEqual) {
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, NestedPropertyLessThan) {
+TEST_P(QueryVisitorTest, NestedPropertyLessThan) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "subscription.price < 2";
+ std::string query = CreateQuery("subscription.price < 2");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -464,8 +505,8 @@ TEST_F(QueryVisitorTest, NestedPropertyLessThan) {
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, IntParsingError) {
- std::string query = "subscription.price < fruit";
+TEST_P(QueryVisitorTest, IntParsingError) {
+ std::string query = CreateQuery("subscription.price < fruit");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -478,8 +519,8 @@ TEST_F(QueryVisitorTest, IntParsingError) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, NotEqualsUnsupported) {
- std::string query = "subscription.price != 3";
+TEST_P(QueryVisitorTest, NotEqualsUnsupported) {
+ std::string query = CreateQuery("subscription.price != 3");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -492,23 +533,23 @@ TEST_F(QueryVisitorTest, NotEqualsUnsupported) {
StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
}
-TEST_F(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
+TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
// Create an invalid AST for the query '3 < subscription.price 25' where '<'
// has three operands
@@ -537,7 +578,7 @@ TEST_F(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
+TEST_P(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
// Create an invalid AST for the query 'subscription.price <' where '<'
// has a single operand
auto property_node = std::make_unique<TextNode>("subscription");
@@ -561,27 +602,25 @@ TEST_F(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
+TEST_P(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
// Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
// respectively.
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
editor->BufferKey(0);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
editor->BufferKey(1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor =
numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
editor->BufferKey(2);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- // Create an invalid AST for the query 'time < 25' where '<'
- // has three operands
- std::string query = "time < 25";
+ std::string query = CreateQuery("time < 25");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -590,11 +629,16 @@ TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
- EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ ElementsAre(kNumericSearchFeature));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+ EXPECT_THAT(query_results.query_terms, IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
}
-TEST_F(QueryVisitorTest, NeverVisitedReturnsInvalid) {
+TEST_P(QueryVisitorTest, NeverVisitedReturnsInvalid) {
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
schema_store_.get(), normalizer_.get(), tokenizer_.get(),
@@ -605,24 +649,24 @@ TEST_F(QueryVisitorTest, NeverVisitedReturnsInvalid) {
}
// TODO(b/208654892) Properly handle negative numbers in query expressions.
-TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanInvalid) {
+TEST_P(QueryVisitorTest, DISABLED_IntMinLessThanInvalid) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MIN + 1 respectively.
int64_t int_min = std::numeric_limits<int64_t>::min();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(int_min);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(std::numeric_limits<int64_t>::max());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_min + 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price <" + std::to_string(int_min);
+ std::string query = CreateQuery("price <" + std::to_string(int_min));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -635,24 +679,24 @@ TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) {
+TEST_P(QueryVisitorTest, IntMaxGreaterThanInvalid) {
// Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
// INT_MAX and INT_MAX - 1 respectively.
int64_t int_max = std::numeric_limits<int64_t>::max();
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit("price", kDocumentId0, kSectionId0);
editor->BufferKey(std::numeric_limits<int64_t>::min());
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
editor->BufferKey(int_max);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
editor->BufferKey(int_max - 1);
- editor->IndexAllBufferedKeys();
+ std::move(*editor).IndexAllBufferedKeys();
- std::string query = "price >" + std::to_string(int_max);
+ std::string query = CreateQuery("price >" + std::to_string(int_max));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -665,9 +709,9 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
+TEST_P(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
// "price" is a STRING token, which cannot be a property name.
- std::string query = R"("price" > 7)";
+ std::string query = CreateQuery(R"("price" > 7)");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -680,7 +724,7 @@ TEST_F(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
+TEST_P(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -698,7 +742,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo";
+ std::string query = CreateQuery("foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -732,7 +776,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST_F(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
+TEST_P(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -750,7 +794,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo";
+ std::string query = CreateQuery("foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -783,7 +827,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
+TEST_P(QueryVisitorTest, SingleVerbatimTerm) {
// Setup the index with docs 0, 1 and 2 holding the values "foo:bar(baz)",
// "foo:bar(baz)" and "bar:baz(foo)" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -801,7 +845,7 @@ TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
editor.BufferTerm("bar:baz(foo)");
editor.IndexAllBufferedTerms();
- std::string query = "\"foo:bar(baz)\"";
+ std::string query = CreateQuery("\"foo:bar(baz)\"");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -834,7 +878,7 @@ TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
// verbatim term?
// Example: verbatim_term = `foobar"`
// Answer: quote char must be escaped. verbatim_query = `foobar\"`
-TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingQuote) {
// Setup the index with docs 0, 1 and 2 holding the values "foobary",
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -854,7 +898,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
// From the comment above, verbatim_term = `foobar"` and verbatim_query =
// `foobar\"`
- std::string query = R"("foobar\"")";
+ std::string query = CreateQuery(R"(("foobar\""))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -880,7 +924,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
// end of the verbatim term
// Example: verbatim_term = `foobar\`
// Answer: escape chars can be escaped. verbatim_query = `foobar\\`
-TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingEscape) {
// Setup the index with docs 0, 1 and 2 holding the values "foobary",
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -900,7 +944,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foobar\`.
- std::string query = R"("foobar\\")";
+ std::string query = CreateQuery(R"(("foobar\\"))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -928,7 +972,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
// in, consume the escape char and add the char like we do for the
// quote char). So the above query would match the verbatim_term
// `foobary`.
-TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
// Setup the index with docs 0, 1 and 2 holding the values "foobary",
// "foobar\" and "foobar"" respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -948,7 +992,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foobary`.
- std::string query = R"("foobar\y")";
+ std::string query = CreateQuery(R"(("foobar\y"))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -970,7 +1014,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
ElementsAre(kDocumentId0));
// Issue a query for the verbatim token `foobar\y`.
- query = R"("foobar\\y")";
+ query = CreateQuery(R"(("foobar\\y"))");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -997,7 +1041,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
// is its own separate ascii value. For a query `foobar\n`, the parser will see
// the character sequence [`f`, `o`, `o`, `b`, `a`, `r`, `\n`] - it *won't* ever
// see `\` and `n`.
-TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
+TEST_P(QueryVisitorTest, VerbatimTermNewLine) {
// Setup the index with docs 0, 1 and 2 holding the values "foobar\n",
// `foobar\` and `foobar\n` respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -1018,7 +1062,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foobar` + '\n'.
- std::string query = "\"foobar\n\"";
+ std::string query = CreateQuery("\"foobar\n\"");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1039,7 +1083,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
ElementsAre(kDocumentId0));
// Now, issue a query for the verbatim token `foobar\n`.
- query = R"("foobar\\n")";
+ query = CreateQuery(R"(("foobar\\n"))");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -1060,7 +1104,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) {
+TEST_P(QueryVisitorTest, VerbatimTermEscapingComplex) {
// Setup the index with docs 0, 1 and 2 holding the values `foo\"bar\nbaz"`,
// `foo\\\"bar\\nbaz\"` and `foo\\"bar\\nbaz"` respectively.
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -1081,7 +1125,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) {
editor.IndexAllBufferedTerms();
// Issue a query for the verbatim token `foo\"bar\nbaz"`.
- std::string query = R"("foo\\\"bar\\nbaz\"")";
+ std::string query = CreateQuery(R"(("foo\\\"bar\\nbaz\""))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1103,7 +1147,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) {
ElementsAre(kDocumentId0));
}
-TEST_F(QueryVisitorTest, SingleMinusTerm) {
+TEST_P(QueryVisitorTest, SingleMinusTerm) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
ICING_ASSERT_OK(schema_store_->SetSchema(
@@ -1132,7 +1176,7 @@ TEST_F(QueryVisitorTest, SingleMinusTerm) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "-foo";
+ std::string query = CreateQuery("-foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1150,7 +1194,7 @@ TEST_F(QueryVisitorTest, SingleMinusTerm) {
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, SingleNotTerm) {
+TEST_P(QueryVisitorTest, SingleNotTerm) {
// Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
// "bar" respectively.
ICING_ASSERT_OK(schema_store_->SetSchema(
@@ -1179,7 +1223,7 @@ TEST_F(QueryVisitorTest, SingleNotTerm) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "NOT foo";
+ std::string query = CreateQuery("NOT foo");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1197,7 +1241,7 @@ TEST_F(QueryVisitorTest, SingleNotTerm) {
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, NestedNotTerms) {
+TEST_P(QueryVisitorTest, NestedNotTerms) {
// Setup the index with docs 0, 1 and 2 holding the values
// ["foo", "bar", "baz"], ["foo", "baz"] and ["bar", "baz"] respectively.
ICING_ASSERT_OK(schema_store_->SetSchema(
@@ -1231,7 +1275,7 @@ TEST_F(QueryVisitorTest, NestedNotTerms) {
editor.IndexAllBufferedTerms();
// Double negative could be rewritten as `(foo AND NOT bar) baz`
- std::string query = "NOT (-foo OR bar) baz";
+ std::string query = CreateQuery("NOT (-foo OR bar) baz");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1252,7 +1296,7 @@ TEST_F(QueryVisitorTest, NestedNotTerms) {
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, DeeplyNestedNotTerms) {
+TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
// Setup the index with docs 0, 1 and 2 holding the values
// ["foo", "bar", "baz"], ["foo", "baz"] and ["bar", "baz"] respectively.
ICING_ASSERT_OK(schema_store_->SetSchema(
@@ -1297,7 +1341,7 @@ TEST_F(QueryVisitorTest, DeeplyNestedNotTerms) {
// ((FALSE OR FALSE) TRUE) TRUE -> ((FALSE) TRUE) TRUE -> FALSE
// Doc 2 : (((-FALSE OR TRUE) TRUE) OR FALSE) NOT FALSE
// ((TRUE OR TRUE) TRUE) TRUE -> ((TRUE) TRUE) TRUE -> TRUE
- std::string query = "NOT (-(NOT (foo -bar) baz) -bat) NOT bass";
+ std::string query = CreateQuery("NOT (-(NOT (foo -bar) baz) -bat) NOT bass");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1318,7 +1362,7 @@ TEST_F(QueryVisitorTest, DeeplyNestedNotTerms) {
ElementsAre(kDocumentId2, kDocumentId0));
}
-TEST_F(QueryVisitorTest, ImplicitAndTerms) {
+TEST_P(QueryVisitorTest, ImplicitAndTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("foo");
@@ -1335,7 +1379,7 @@ TEST_F(QueryVisitorTest, ImplicitAndTerms) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo bar";
+ std::string query = CreateQuery("foo bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1356,7 +1400,7 @@ TEST_F(QueryVisitorTest, ImplicitAndTerms) {
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, ExplicitAndTerms) {
+TEST_P(QueryVisitorTest, ExplicitAndTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("foo");
@@ -1373,7 +1417,7 @@ TEST_F(QueryVisitorTest, ExplicitAndTerms) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo AND bar";
+ std::string query = CreateQuery("foo AND bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1394,7 +1438,7 @@ TEST_F(QueryVisitorTest, ExplicitAndTerms) {
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, OrTerms) {
+TEST_P(QueryVisitorTest, OrTerms) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("foo");
@@ -1411,7 +1455,7 @@ TEST_F(QueryVisitorTest, OrTerms) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = "foo OR bar";
+ std::string query = CreateQuery("foo OR bar");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1432,7 +1476,7 @@ TEST_F(QueryVisitorTest, OrTerms) {
ElementsAre(kDocumentId2, kDocumentId0));
}
-TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
+TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
TERM_MATCH_PREFIX, /*namespace_id=*/0);
editor.BufferTerm("bar");
@@ -1451,7 +1495,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
editor.IndexAllBufferedTerms();
// Should be interpreted like `foo (bar OR baz)`
- std::string query = "foo bar OR baz";
+ std::string query = CreateQuery("foo bar OR baz");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1472,7 +1516,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
ElementsAre(kDocumentId2, kDocumentId1));
// Should be interpreted like `(bar OR baz) foo`
- query = "bar OR baz foo";
+ query = CreateQuery("bar OR baz foo");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -1491,7 +1535,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId1));
- query = "(bar OR baz) foo";
+ query = CreateQuery("(bar OR baz) foo");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_three(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -1511,7 +1555,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
ElementsAre(kDocumentId2, kDocumentId1));
}
-TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
+TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type").AddProperty(
@@ -1545,7 +1589,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
editor.IndexAllBufferedTerms();
// Should be interpreted like `foo ((NOT bar) OR baz)`
- std::string query = "foo NOT bar OR baz";
+ std::string query = CreateQuery("foo NOT bar OR baz");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1565,7 +1609,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2, kDocumentId0));
- query = "foo NOT (bar OR baz)";
+ query = CreateQuery("foo NOT (bar OR baz)");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -1584,7 +1628,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
ElementsAre(kDocumentId0));
}
-TEST_F(QueryVisitorTest, PropertyFilter) {
+TEST_P(QueryVisitorTest, PropertyFilter) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1625,7 +1669,7 @@ TEST_F(QueryVisitorTest, PropertyFilter) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "prop1:foo";
+ std::string query = CreateQuery("foo", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1646,7 +1690,76 @@ TEST_F(QueryVisitorTest, PropertyFilter) {
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, PropertyFilterStringIsInvalid) {
+TEST_F(QueryVisitorTest, MultiPropertyFilter) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop3")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+ SectionId prop3_section_id = 2;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop3_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = R"(search("foo", createList("prop1", "prop2")))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1", "prop2"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterStringIsInvalid) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1664,7 +1777,7 @@ TEST_F(QueryVisitorTest, PropertyFilterStringIsInvalid) {
.Build()));
// "prop1" is a STRING token, which cannot be a property name.
- std::string query = R"("prop1":foo)";
+ std::string query = CreateQuery(R"(("prop1":foo))");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1677,7 +1790,7 @@ TEST_F(QueryVisitorTest, PropertyFilterStringIsInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, PropertyFilterNonNormalized) {
+TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1718,7 +1831,7 @@ TEST_F(QueryVisitorTest, PropertyFilterNonNormalized) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "PROP1:foo";
+ std::string query = CreateQuery("foo", /*property_restrict=*/"PROP1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1739,7 +1852,7 @@ TEST_F(QueryVisitorTest, PropertyFilterNonNormalized) {
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
+TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1780,7 +1893,8 @@ TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "prop1:(foo OR bar)";
+ std::string query =
+ CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1802,7 +1916,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
ElementsAre(kDocumentId1, kDocumentId0));
}
-TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) {
+TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1843,7 +1957,7 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "prop1:(prop1:foo)";
+ std::string query = CreateQuery("(prop1:foo)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1863,7 +1977,8 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) {
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId1));
- query = "prop1:(prop1:(prop1:(prop1:(prop1:foo))))";
+ query = CreateQuery("(prop1:(prop1:(prop1:(prop1:foo))))",
+ /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -1883,7 +1998,7 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) {
ElementsAre(kDocumentId1));
}
-TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) {
+TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1924,7 +2039,7 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "prop1:(prop2:foo)";
+ std::string query = CreateQuery("(prop2:foo)", /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -1937,11 +2052,15 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) {
std::move(query_visitor).ConsumeResults());
EXPECT_THAT(query_results.features_in_use, IsEmpty());
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
- EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
- UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
- query = "prop1:(prop2:(prop1:(prop2:(prop1:foo))))";
+ // Resulting queries:
+ // - kPlain: `prop1:(prop2:(prop1:(prop2:(prop1:foo))))`
+ // - kSearch: `-search("(prop2:(prop1:(prop2:(prop1:foo))))",
+ // createList("prop1"))`
+ query = CreateQuery("(prop2:(prop1:(prop2:(prop1:foo))))",
+ /*property_restrict=*/"prop1");
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -1953,12 +2072,11 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) {
std::move(query_visitor_two).ConsumeResults());
EXPECT_THAT(query_results.features_in_use, IsEmpty());
EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
- EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
- UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
}
-TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
+TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1999,7 +2117,11 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
- std::string query = "-prop1:(foo OR bar)";
+ // Resulting queries:
+ // - kPlain: `-prop1:(foo OR bar)`
+ // - kSearch: `-search("foo OR bar", createList("prop1"))`
+ std::string query = absl_ports::StrCat(
+ "-", CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1"));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -2016,7 +2138,11 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
ElementsAre(kDocumentId2));
- query = "NOT prop1:(foo OR bar)";
+ // Resulting queries:
+ // - kPlain: `NOT prop1:(foo OR bar)`
+ // - kSearch: `NOT search("foo OR bar", createList("prop1"))`
+ query = absl_ports::StrCat(
+ "NOT ", CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1"));
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
@@ -2033,7 +2159,96 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
ElementsAre(kDocumentId2));
}
-TEST_F(QueryVisitorTest, SegmentationTest) {
+TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Resulting queries:
+ // - kPlain: `prop1:(-foo OR bar)`
+ // - kSearch: `search("-foo OR bar", createList("prop1"))`
+ std::string query =
+ CreateQuery("(-foo OR bar)", /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId0));
+
+ // Resulting queries:
+ // - kPlain: `prop1:(foo OR bar)`
+ // - kSearch: `search("foo OR bar", createList("prop1"))`
+ query = CreateQuery("(NOT foo OR bar)", /*property_restrict=*/"prop1");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SegmentationTest) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -2055,7 +2270,7 @@ TEST_F(QueryVisitorTest, SegmentationTest) {
// ICU segmentation will break this into "每天" and "上班".
// CFStringTokenizer (ios) will break this into "每", "天" and "上班"
- std::string query = "每天上班";
+ std::string query = CreateQuery("每天上班");
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
@@ -2118,6 +2333,795 @@ TEST_F(QueryVisitorTest, SegmentationTest) {
ElementsAre(kDocumentId0));
}
+TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
+ .Build()));
+
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ NamespaceId ns_id = 0;
+
+ // Create the following docs:
+ // - Doc 0: Contains 'val0', 'val1', 'val2' in 'prop0'. Shouldn't match.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri1").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri2").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri3").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
+ editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
+ // Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri4").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
+ editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+
+ // Now issue a query with 'val1' restricted to 'prop1'. This should match only
+ // docs 1 and 3.
+ // Resulting queries:
+ // - kPlain: `val0 prop1:val1 val2`
+ // - kSearch: `val0 search("val1", createList("prop1")) val2`
+ std::string query = absl_ports::StrCat(
+ "val0 ", CreateQuery("val1", /*property_restrict=*/"prop1"), " val2");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("", "prop1"));
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("val0", "val2"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("val1"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("val0", "val1", "val2"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid3, docid1));
+}
+
+TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
+ .Build()));
+
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ NamespaceId ns_id = 0;
+
+ // Create the following docs:
+ // - Doc 0: Contains 'val0', 'val1', 'val2' in 'prop0'. Should match.
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri1").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri2").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.BufferTerm("val1");
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
+ doc = DocumentBuilder(doc).SetUri("uri3").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
+ editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+
+ // - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
+ // Shouldn't match.
+ doc = DocumentBuilder(doc).SetUri("uri4").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
+ editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val1");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val2");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("val0");
+ editor.IndexAllBufferedTerms();
+
+ // Now issue a query with 'val1' restricted to 'prop1'. This should match only
+ // docs 1 and 3.
+ // Resulting queries:
+ // - kPlain: `val0 OR prop1:(prop2:val1) OR val2`
+ // - kSearch: `prop0:val0 OR search("(prop2:val1)", createList("prop1")) OR
+ // prop2:val2`
+ std::string query = absl_ports::StrCat(
+ "prop0:val0 OR prop1:(",
+ CreateQuery("val1", /*property_restrict=*/"prop2"), ") OR prop2:val2");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop2"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("val0"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("val2"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("val0", "val2"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid3, docid2, docid0));
+}
+
+TEST_F(QueryVisitorTest, UnsupportedFunctionReturnsInvalidArgument) {
+ std::string query = "unsupportedFunction()";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionTooFewArgumentsReturnsInvalidArgument) {
+ std::string query = "search()";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionTooManyArgumentsReturnsInvalidArgument) {
+ std::string query = R"(search("foo", createList("subject"), "bar"))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ SearchFunctionWrongFirstArgumentTypeReturnsInvalidArgument) {
+ // First argument type=TEXT, expected STRING.
+ std::string query = "search(7)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // First argument type=string list, expected STRING.
+ query = R"(search(createList("subject")))";
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_two);
+ EXPECT_THAT(std::move(query_visitor_two).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ SearchFunctionWrongSecondArgumentTypeReturnsInvalidArgument) {
+ // Second argument type=STRING, expected string list.
+ std::string query = R"(search("foo", "bar"))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Second argument type=TEXT, expected string list.
+ query = R"(search("foo", 7))";
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_two);
+ EXPECT_THAT(std::move(query_visitor_two).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+ SearchFunctionCreateListZeroPropertiesReturnsInvalidArgument) {
+ std::string query = R"(search("foo", createList()))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.BufferTerm("bar");
+ editor.IndexAllBufferedTerms();
+
+ // *If* nested function calls were allowed, then this would simplify as:
+ // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+ // But nested function calls are disallowed. So this is rejected.
+ std::string level_one_query = R"(search("foo", createList("prop1")) bar)";
+ std::string level_two_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_one_query),
+ R"(", createList("prop1")))");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(level_two_query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2));
+
+ std::string level_three_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+ R"(", createList("prop1")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2));
+
+ std::string level_four_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_three_query),
+ R"(", createList("prop1")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_four_query));
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_three);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_three).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop1"));
+ EXPECT_THAT(query_results.query_terms["prop1"],
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo", "bar"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId2));
+}
+
+// This test will nest `search` calls together with the set of restricts
+// narrowing at each level so that the set of docs matching the query shrinks.
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ SectionId prop3_id = 3;
+ SectionId prop4_id = 4;
+ SectionId prop5_id = 5;
+ SectionId prop6_id = 6;
+ SectionId prop7_id = 7;
+
+ NamespaceId ns_id = 0;
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid1,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid2,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid3,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid4,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid5,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid6,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid7,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // *If* nested function calls were allowed, then this would simplify as:
+ // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+ // But nested function calls are disallowed. So this is rejected.
+ std::string level_one_query =
+ R"(search("foo", createList("prop2", "prop5", "prop1", "prop3", "prop0", "prop6", "prop4", "prop7")))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(level_one_query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop1", "prop2", "prop3", "prop4",
+ "prop5", "prop6", "prop7"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop3"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop4"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop5"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop7"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid7, docid6, docid5, docid4, docid3, docid2,
+ docid1, docid0));
+
+ std::string level_two_query = absl_ports::StrCat(
+ R"(search(")", EscapeString(level_one_query),
+ R"(", createList("prop6", "prop0", "prop4", "prop2")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop2", "prop4", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop4"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid4, docid2, docid0));
+
+ std::string level_three_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+ R"(", createList("prop0", "prop6")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_three);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_three).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+}
+
+// This test will nest `search` calls together with the set of restricts
+// narrowing at each level so that the set of docs matching the query shrinks.
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpandinging) {
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("prop0")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(prop)
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop0_id = 0;
+ SectionId prop1_id = 1;
+ SectionId prop2_id = 2;
+ SectionId prop3_id = 3;
+ SectionId prop4_id = 4;
+ SectionId prop5_id = 5;
+ SectionId prop6_id = 6;
+ SectionId prop7_id = 7;
+
+ NamespaceId ns_id = 0;
+ DocumentProto doc =
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+ Index::Editor editor =
+ index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid1,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+ editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid2,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+ editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid3,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+ editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid4,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+ editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid5,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+ editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid6,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+ editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId docid7,
+ document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+ editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // *If* nested function calls were allowed, then this would simplify as:
+ // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+ // But nested function calls are disallowed. So this is rejected.
+ std::string level_one_query =
+ R"(search("foo", createList("prop0", "prop6")))";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(level_one_query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+
+ std::string level_two_query = absl_ports::StrCat(
+ R"(search(")", EscapeString(level_one_query),
+ R"(", createList("prop6", "prop0", "prop4", "prop2")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
+ QueryVisitor query_visitor_two(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_two);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_two).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+
+ std::string level_three_query =
+ absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+ R"(", createList("prop2", "prop5", "prop1", "prop3",)",
+ R"( "prop0", "prop6", "prop4", "prop7")))");
+ ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+ QueryVisitor query_visitor_three(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor_three);
+ ICING_ASSERT_OK_AND_ASSIGN(query_results,
+ std::move(query_visitor_three).ConsumeResults());
+
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("prop0", "prop6"));
+ EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(docid6, docid0));
+}
+
+INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest,
+ testing::Values(QueryType::kPlain,
+ QueryType::kSearch));
+
} // namespace
} // namespace lib
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 47f109c..9b03a0e 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -39,8 +39,8 @@
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/parser.h"
#include "icing/query/advanced_query_parser/query-visitor.h"
-#include "icing/query/query-processor.h"
#include "icing/query/query-features.h"
+#include "icing/query/query-processor.h"
#include "icing/query/query-results.h"
#include "icing/query/query-terms.h"
#include "icing/query/query-utils.h"
@@ -230,7 +230,6 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
std::stack<ParserStateFrame> frames;
frames.emplace();
-
QueryResults results;
// Process all the tokens
for (int i = 0; i < tokens.size(); i++) {
@@ -309,11 +308,12 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
// We do the same amount of disk reads, so it may be dependent on how
// big the schema is and/or how popular schema type filtering and
// section filtering is.
-
ICING_ASSIGN_OR_RETURN(
result_iterator,
index_.GetIterator(
- normalized_text, kSectionIdMaskAll,
+ normalized_text,
+ token.text.data() - search_spec.query().c_str(),
+ token.text.length(), kSectionIdMaskAll,
search_spec.term_match_type(),
/*need_hit_term_frequency=*/ranking_strategy ==
ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
@@ -329,7 +329,9 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> term_iterator,
index_.GetIterator(
- normalized_text, kSectionIdMaskAll,
+ normalized_text,
+ token.text.data() - search_spec.query().c_str(),
+ token.text.length(), kSectionIdMaskAll,
search_spec.term_match_type(),
/*need_hit_term_frequency=*/ranking_strategy ==
ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
@@ -344,6 +346,7 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
break;
}
case Token::Type::INVALID:
+ ICING_LOG(ERROR) << "INVALID";
[[fallthrough]];
default:
// This wouldn't happen if tokenizer and query processor both work
@@ -390,9 +393,11 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
if (!frames.top().section_restrict.empty()) {
// We saw a section restrict earlier, wrap the result iterator in
// the section restrict
+ std::set<std::string> section_restricts;
+ section_restricts.insert(std::move(frames.top().section_restrict));
result_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
std::move(result_iterator), &document_store_, &schema_store_,
- std::move(frames.top().section_restrict));
+ std::move(section_restricts));
frames.top().section_restrict = "";
}
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 7842a9a..cbf859b 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -141,7 +141,7 @@ class QueryProcessorTest
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit(property, document_id, section_id);
ICING_RETURN_IF_ERROR(editor->BufferKey(value));
- return editor->IndexAllBufferedKeys();
+ return std::move(*editor).IndexAllBufferedKeys();
}
void TearDown() override {
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
index af84d1c..3626ae3 100644
--- a/icing/query/suggestion-processor.cc
+++ b/icing/query/suggestion-processor.cc
@@ -16,6 +16,9 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/query/query-processor.h"
+#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker-impl.h"
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
@@ -25,63 +28,266 @@ namespace lib {
libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
SuggestionProcessor::Create(Index* index,
+ const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer) {
+ const Normalizer* normalizer,
+ const DocumentStore* document_store,
+ const SchemaStore* schema_store) {
ICING_RETURN_ERROR_IF_NULL(index);
+ ICING_RETURN_ERROR_IF_NULL(numeric_index);
ICING_RETURN_ERROR_IF_NULL(language_segmenter);
+ ICING_RETURN_ERROR_IF_NULL(normalizer);
+ ICING_RETURN_ERROR_IF_NULL(document_store);
+ ICING_RETURN_ERROR_IF_NULL(schema_store);
return std::unique_ptr<SuggestionProcessor>(
- new SuggestionProcessor(index, language_segmenter, normalizer));
+ new SuggestionProcessor(index, numeric_index, language_segmenter,
+ normalizer, document_store, schema_store));
+}
+
+libtextclassifier3::StatusOr<
+ std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>>
+PopulateDocumentIdFilters(
+ const DocumentStore* document_store,
+ const icing::lib::SuggestionSpecProto& suggestion_spec,
+ const std::unordered_set<NamespaceId>& namespace_ids) {
+ std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+ document_id_filter_map;
+ document_id_filter_map.reserve(suggestion_spec.document_uri_filters_size());
+ for (const NamespaceDocumentUriGroup& namespace_document_uri_group :
+ suggestion_spec.document_uri_filters()) {
+ auto namespace_id_or = document_store->GetNamespaceId(
+ namespace_document_uri_group.namespace_());
+ if (!namespace_id_or.ok()) {
+ // The current namespace doesn't exist.
+ continue;
+ }
+ NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+ if (!namespace_ids.empty() &&
+ namespace_ids.find(namespace_id) == namespace_ids.end()) {
+ // The current namespace doesn't appear in the namespace filter.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The namespace : ", namespace_document_uri_group.namespace_(),
+ " appears in the document uri filter, but doesn't appear in the "
+ "namespace filter."));
+ }
+
+ if (namespace_document_uri_group.document_uris().empty()) {
+ // Client should use namespace filter to filter out all document under
+ // a namespace.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The namespace : ", namespace_document_uri_group.namespace_(),
+ " has empty document uri in the document uri filter. Please use the "
+ "namespace filter to exclude a namespace instead of the document uri "
+ "filter."));
+ }
+
+ // Translate namespace document Uris into document_ids
+ std::unordered_set<DocumentId> target_document_ids;
+ target_document_ids.reserve(
+ namespace_document_uri_group.document_uris_size());
+ for (std::string_view document_uri :
+ namespace_document_uri_group.document_uris()) {
+ auto document_id_or = document_store->GetDocumentId(
+ namespace_document_uri_group.namespace_(), document_uri);
+ if (!document_id_or.ok()) {
+ continue;
+ }
+ target_document_ids.insert(document_id_or.ValueOrDie());
+ }
+ document_id_filter_map.insert({namespace_id, target_document_ids});
+ }
+ return document_id_filter_map;
+}
+
+libtextclassifier3::StatusOr<std::unordered_map<SchemaTypeId, SectionIdMask>>
+PopulatePropertyFilters(
+ const SchemaStore* schema_store,
+ const icing::lib::SuggestionSpecProto& suggestion_spec,
+ const std::unordered_set<SchemaTypeId>& schema_type_ids) {
+ std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map;
+ property_filter_map.reserve(suggestion_spec.type_property_filters_size());
+ for (const TypePropertyMask& type_field_mask :
+ suggestion_spec.type_property_filters()) {
+ auto schema_type_id_or =
+ schema_store->GetSchemaTypeId(type_field_mask.schema_type());
+ if (!schema_type_id_or.ok()) {
+ // The current schema doesn't exist
+ continue;
+ }
+ SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
+
+ if (!schema_type_ids.empty() &&
+ schema_type_ids.find(schema_type_id) == schema_type_ids.end()) {
+ // The current schema type doesn't appear in the schema type filter.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The schema : ", type_field_mask.schema_type(),
+ " appears in the property filter, but doesn't appear in the schema"
+ " type filter."));
+ }
+
+ if (type_field_mask.paths().empty()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "The schema type : ", type_field_mask.schema_type(),
+ " has empty path in the property filter. Please use the schema type"
+ " filter to exclude a schema type instead of the property filter."));
+ }
+
+ // Translate property paths into section id mask
+ SectionIdMask section_mask = kSectionIdMaskNone;
+ auto section_metadata_list_or =
+ schema_store->GetSectionMetadata(type_field_mask.schema_type());
+ if (!section_metadata_list_or.ok()) {
+ // The current schema doesn't has section metadata.
+ continue;
+ }
+ std::unordered_set<std::string> target_property_paths;
+ target_property_paths.reserve(type_field_mask.paths_size());
+ for (const std::string& target_property_path : type_field_mask.paths()) {
+ target_property_paths.insert(target_property_path);
+ }
+ const std::vector<SectionMetadata>* section_metadata_list =
+ section_metadata_list_or.ValueOrDie();
+ for (const SectionMetadata& section_metadata : *section_metadata_list) {
+ if (target_property_paths.find(section_metadata.path) !=
+ target_property_paths.end()) {
+ section_mask |= UINT64_C(1) << section_metadata.id;
+ }
+ }
+ property_filter_map.insert({schema_type_id, section_mask});
+ }
+ return property_filter_map;
}
libtextclassifier3::StatusOr<std::vector<TermMetadata>>
SuggestionProcessor::QuerySuggestions(
- const icing::lib::SuggestionSpecProto& suggestion_spec,
- const SuggestionResultChecker* suggestion_result_checker) {
+ const icing::lib::SuggestionSpecProto& suggestion_spec) {
// We use query tokenizer to tokenize the give prefix, and we only use the
// last token to be the suggestion prefix.
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<Tokenizer> tokenizer,
- tokenizer_factory::CreateIndexingTokenizer(
- StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
- tokenizer->Tokenize(suggestion_spec.prefix()));
-
- // If there are previous tokens, they are prepended to the suggestion,
- // separated by spaces.
- std::string last_token;
- int token_start_pos;
- while (iterator->Advance()) {
- for (const Token& token : iterator->GetTokens()) {
- last_token = token.text;
- token_start_pos = token.text.data() - suggestion_spec.prefix().c_str();
+
+ // Populate target namespace filter.
+ std::unordered_set<NamespaceId> namespace_ids;
+ namespace_ids.reserve(suggestion_spec.namespace_filters_size());
+ for (std::string_view name_space : suggestion_spec.namespace_filters()) {
+ auto namespace_id_or = document_store_.GetNamespaceId(name_space);
+ if (!namespace_id_or.ok()) {
+ // The current namespace doesn't exist.
+ continue;
}
+ namespace_ids.insert(namespace_id_or.ValueOrDie());
+ }
+ if (namespace_ids.empty() && !suggestion_spec.namespace_filters().empty()) {
+ // None of desired namespace exists, we should return directly.
+ return std::vector<TermMetadata>();
+ }
+
+ // Populate target document id filter.
+ auto document_id_filter_map_or = PopulateDocumentIdFilters(
+ &document_store_, suggestion_spec, namespace_ids);
+ if (!document_id_filter_map_or.ok()) {
+ return std::move(document_id_filter_map_or).status();
+ }
+
+ std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+ document_id_filter_map = document_id_filter_map_or.ValueOrDie();
+ if (document_id_filter_map.empty() &&
+ !suggestion_spec.document_uri_filters().empty()) {
+ // None of desired DocumentId exists, we should return directly.
+ return std::vector<TermMetadata>();
}
+ // Populate target schema type filter.
+ std::unordered_set<SchemaTypeId> schema_type_ids;
+ schema_type_ids.reserve(suggestion_spec.schema_type_filters_size());
+ for (std::string_view schema_type : suggestion_spec.schema_type_filters()) {
+ auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+ if (!schema_type_id_or.ok()) {
+ continue;
+ }
+ schema_type_ids.insert(schema_type_id_or.ValueOrDie());
+ }
+ if (schema_type_ids.empty() &&
+ !suggestion_spec.schema_type_filters().empty()) {
+ // None of desired schema type exists, we should return directly.
+ return std::vector<TermMetadata>();
+ }
+
+ // Populate target properties filter.
+ auto property_filter_map_or =
+ PopulatePropertyFilters(&schema_store_, suggestion_spec, schema_type_ids);
+ if (!property_filter_map_or.ok()) {
+ return std::move(property_filter_map_or).status();
+ }
+ std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map =
+ property_filter_map_or.ValueOrDie();
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<QueryProcessor> query_processor,
+ QueryProcessor::Create(&index_, &numeric_index_, &language_segmenter_,
+ &normalizer_, &document_store_, &schema_store_));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query(suggestion_spec.prefix());
+ search_spec.set_term_match_type(
+ suggestion_spec.scoring_spec().scoring_match_type());
+ ICING_ASSIGN_OR_RETURN(
+ QueryResults query_results,
+ query_processor->ParseSearch(search_spec,
+ ScoringSpecProto::RankingStrategy::NONE));
+
+ ICING_ASSIGN_OR_RETURN(
+ DocHitInfoIterator::TrimmedNode trimmed_node,
+ std::move(*query_results.root_iterator).TrimRightMostNode());
+
// If the position of the last token is not the end of the prefix, it means
// there should be some operator tokens after it and are ignored by the
// tokenizer.
- bool is_last_token = token_start_pos + last_token.length() >=
- suggestion_spec.prefix().length();
+ bool is_last_token =
+ trimmed_node.term_start_index_ + trimmed_node.unnormalized_term_length_ >=
+ suggestion_spec.prefix().length();
- if (!is_last_token || last_token.empty()) {
+ if (!is_last_token || trimmed_node.term_.empty()) {
// We don't have a valid last token, return early.
return std::vector<TermMetadata>();
}
+ // Populate the search base in document ids.
+ // Suggestions are only generated for the very last term,
+ // trimmed_node.iterator_ tracks search results for all previous terms. If it
+ // is null means there is no pervious term and we are generating suggetion for
+ // a single term.
+ std::unordered_set<DocumentId> search_base;
+ if (trimmed_node.iterator_ != nullptr) {
+ while (trimmed_node.iterator_->Advance().ok()) {
+ search_base.insert(trimmed_node.iterator_->doc_hit_info().document_id());
+ }
+ if (search_base.empty()) {
+ // Nothing matches the previous terms in the query. There are no valid
+ // suggestions to make, we should return directly.
+ return std::vector<TermMetadata>();
+ }
+ }
+
+ // Create result checker based on given filters.
+ SuggestionResultCheckerImpl suggestion_result_checker_impl(
+ &document_store_, &schema_store_, std::move(namespace_ids),
+ std::move(document_id_filter_map), std::move(schema_type_ids),
+ std::move(property_filter_map), std::move(trimmed_node.target_section_),
+ std::move(search_base));
+ // TODO(b/228240987) support generate suggestion and append suffix for advance
+ // query and function call.
std::string query_prefix =
- suggestion_spec.prefix().substr(0, token_start_pos);
+ suggestion_spec.prefix().substr(0, trimmed_node.term_start_index_);
// Run suggestion based on given SuggestionSpec.
// Normalize token text to lowercase since all tokens in the lexicon are
// lowercase.
ICING_ASSIGN_OR_RETURN(
std::vector<TermMetadata> terms,
index_.FindTermsByPrefix(
- normalizer_.NormalizeTerm(last_token),
- suggestion_spec.num_to_return(),
+ trimmed_node.term_, suggestion_spec.num_to_return(),
suggestion_spec.scoring_spec().scoring_match_type(),
- suggestion_spec.scoring_spec().rank_by(), suggestion_result_checker));
-
+ suggestion_spec.scoring_spec().rank_by(),
+ &suggestion_result_checker_impl));
for (TermMetadata& term : terms) {
term.content = query_prefix + term.content;
}
@@ -89,11 +295,15 @@ SuggestionProcessor::QuerySuggestions(
}
SuggestionProcessor::SuggestionProcessor(
- Index* index, const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer)
+ Index* index, const NumericIndex<int64_t>* numeric_index,
+ const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+ const DocumentStore* document_store, const SchemaStore* schema_store)
: index_(*index),
+ numeric_index_(*numeric_index),
language_segmenter_(*language_segmenter),
- normalizer_(*normalizer) {}
+ normalizer_(*normalizer),
+ document_store_(*document_store),
+ schema_store_(*schema_store) {}
} // namespace lib
} // namespace icing
diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h
index 97ced90..01ea9b9 100644
--- a/icing/query/suggestion-processor.h
+++ b/icing/query/suggestion-processor.h
@@ -17,7 +17,10 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/index.h"
+#include "icing/index/numeric/numeric-index.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
@@ -37,8 +40,10 @@ class SuggestionProcessor {
// An SuggestionProcessor on success
// FAILED_PRECONDITION if any of the pointers is null.
static libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
- Create(Index* index, const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer);
+ Create(Index* index, const NumericIndex<int64_t>* numeric_index,
+ const LanguageSegmenter* language_segmenter,
+ const Normalizer* normalizer, const DocumentStore* document_store,
+ const SchemaStore* schema_store);
// Query suggestions based on the given SuggestionSpecProto.
//
@@ -47,19 +52,24 @@ class SuggestionProcessor {
// - One vector that represents the entire TermMetadata
// INTERNAL_ERROR on all other errors
libtextclassifier3::StatusOr<std::vector<TermMetadata>> QuerySuggestions(
- const SuggestionSpecProto& suggestion_spec,
- const SuggestionResultChecker* suggestion_result_checker);
+ const SuggestionSpecProto& suggestion_spec);
private:
explicit SuggestionProcessor(Index* index,
+ const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter,
- const Normalizer* normalizer);
+ const Normalizer* normalizer,
+ const DocumentStore* document_store,
+ const SchemaStore* schema_store);
// Not const because we could modify/sort the TermMetaData buffer in the lite
// index.
Index& index_;
+ const NumericIndex<int64_t>& numeric_index_;
const LanguageSegmenter& language_segmenter_;
const Normalizer& normalizer_;
+ const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
};
} // namespace lib
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index d541620..e161099 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -15,8 +15,11 @@
#include "icing/query/suggestion-processor.h"
#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/term-metadata.h"
+#include "icing/schema-builder.h"
#include "icing/store/document-store.h"
-#include "icing/testing/always-true-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -33,19 +36,24 @@ namespace lib {
namespace {
using ::testing::IsEmpty;
+using ::testing::SizeIs;
using ::testing::Test;
+using ::testing::UnorderedElementsAre;
class SuggestionProcessorTest : public Test {
protected:
SuggestionProcessorTest()
: test_dir_(GetTestTempDir() + "/icing"),
store_dir_(test_dir_ + "/store"),
- index_dir_(test_dir_ + "/index") {}
+ schema_store_dir_(test_dir_ + "/schema_store"),
+ index_dir_(test_dir_ + "/index"),
+ numeric_index_dir_(test_dir_ + "/numeric_index") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
// If we've specified using the reverse-JNI method for segmentation (i.e.
@@ -59,10 +67,24 @@ class SuggestionProcessorTest : public Test {
GetTestFilePath("icing/icu.dat")));
}
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+ schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
+
Index::Options options(index_dir_,
/*index_merge_size=*/1024 * 1024);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+ // TODO(b/249829533): switch to use persistent numeric index.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
@@ -74,13 +96,10 @@ class SuggestionProcessorTest : public Test {
/*max_term_byte_size=*/1000));
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
- schema_store_.get()));
+ suggestion_processor_,
+ SuggestionProcessor::Create(
+ index_.get(), numeric_index_.get(), language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(), schema_store_.get()));
}
libtextclassifier3::Status AddTokenToIndex(
@@ -93,232 +112,532 @@ class SuggestionProcessorTest : public Test {
}
void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
Filesystem filesystem_;
const std::string test_dir_;
const std::string store_dir_;
+ const std::string schema_store_dir_;
private:
IcingFilesystem icing_filesystem_;
const std::string index_dir_;
+ const std::string numeric_index_dir_;
protected:
std::unique_ptr<Index> index_;
+ std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
FakeClock fake_clock_;
std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+ std::unique_ptr<SuggestionProcessor> suggestion_processor_;
};
-constexpr DocumentId kDocumentId0 = 0;
constexpr SectionId kSectionId2 = 2;
-TEST_F(SuggestionProcessorTest, PrependedPrefixTokenTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
+ EXPECT_THAT(terms.at(0).content, "bar foo");
+ EXPECT_THAT(terms, SizeIs(1));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "cat"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix(
- "prefix token should be prepended to the suggestion f");
+ suggestion_spec.set_prefix("bar cat f");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content,
- "prefix token should be prepended to the suggestion foo");
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
+ EXPECT_THAT(terms.at(0).content, "bar cat foo");
+ EXPECT_THAT(terms, SizeIs(1));
}
-TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "cat"),
+ IsOk());
+
+ // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
+ // foo" could match.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("bar OR cat f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
+ std::vector<std::string> suggestions;
+ for (TermMetadata term : terms) {
+ suggestions.push_back(term.content);
+ }
+ EXPECT_THAT(suggestions,
+ UnorderedElementsAre("bar OR cat fo", "bar OR cat foo"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId2,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "3")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "cat"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
+ TermMatchType::EXACT_ONLY, "lot"),
+ IsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ // Search for "((bar OR cat) OR lot) AND f"
+ suggestion_spec.set_prefix("bar OR cat OR lot f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
+ std::vector<std::string> suggestions;
+ for (TermMetadata term : terms) {
+ suggestions.push_back(term.content);
+ }
+ // "fo" in document1, "foo" in document2 and "fool" in document3 could match.
+ EXPECT_THAT(suggestions, UnorderedElementsAre("bar OR cat OR lot fo",
+ "bar OR cat OR lot foo",
+ "bar OR cat OR lot fool"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "2")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "fool"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+ TermMatchType::EXACT_ONLY, "bar"),
+ IsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ // Search for "bar AND FO"
+ suggestion_spec.set_prefix("bar FO");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<TermMetadata> terms,
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
+ std::vector<std::string> suggestions;
+ for (TermMetadata term : terms) {
+ suggestions.push_back(term.content);
+ }
+ // The term is normalized.
+ EXPECT_THAT(suggestions, UnorderedElementsAre("bar foo", "bar fool"));
+ suggestions.clear();
+ // Search for "bar AND ḞÖ"
+ suggestion_spec.set_prefix("bar ḞÖ");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
+ for (TermMetadata term : terms) {
+ suggestions.push_back(term.content);
+ }
+ // The term is normalized.
+ EXPECT_THAT(suggestions, UnorderedElementsAre("bar foo", "bar fool"));
+}
+
+TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("nonExistTerm");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
-
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("f ");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
-
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("F");
suggestion_spec.set_num_to_return(10);
-
- AlwaysTrueSuggestionResultCheckerImpl impl;
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms.at(0).content, "foo");
suggestion_spec.set_prefix("fO");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms.at(0).content, "foo");
suggestion_spec.set_prefix("Fo");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms.at(0).content, "foo");
suggestion_spec.set_prefix("FO");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
- EXPECT_THAT(terms.at(0).content, "foo");
-}
-
-TEST_F(SuggestionProcessorTest, OrOperatorPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "foo"),
- IsOk());
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
- TermMatchType::EXACT_ONLY, "original"),
- IsOk());
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f OR");
- suggestion_spec.set_num_to_return(10);
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
- AlwaysTrueSuggestionResultCheckerImpl impl;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-
- // Last Operator token will be used to query suggestion
- EXPECT_THAT(terms.at(0).content, "f original");
+ EXPECT_THAT(terms.at(0).content, "foo");
}
TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("{f}");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("[f]");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("(f)");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
-
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("f:");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("f-");
ICING_ASSERT_OK_AND_ASSIGN(
- terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
+ EXPECT_THAT(terms, IsEmpty());
+
+ suggestion_spec.set_prefix("f OR");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
- ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "original"),
IsOk());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SuggestionProcessor> suggestion_processor,
- SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get()));
-
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("OR OR - :");
suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
- AlwaysTrueSuggestionResultCheckerImpl impl;
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
- suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
+ suggestion_processor_->QuerySuggestions(suggestion_spec));
EXPECT_THAT(terms, IsEmpty());
}
diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
index 03bb849..fded576 100644
--- a/icing/result/projection-tree.cc
+++ b/icing/result/projection-tree.cc
@@ -16,9 +16,8 @@
#include <algorithm>
-#include "icing/absl_ports/str_join.h"
#include "icing/proto/search.pb.h"
-#include "icing/schema/section-manager.h"
+#include "icing/schema/property-util.h"
namespace icing {
namespace lib {
@@ -27,7 +26,7 @@ ProjectionTree::ProjectionTree(const TypePropertyMask& type_field_mask) {
for (const std::string& field_mask : type_field_mask.paths()) {
Node* current_node = &root_;
for (std::string_view sub_field_mask :
- absl_ports::StrSplit(field_mask, kPropertySeparator)) {
+ property_util::SplitPropertyPathExpr(field_mask)) {
current_node = AddChildNode(sub_field_mask, &current_node->children);
}
}
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index d6511e1..1176eaf 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -27,13 +27,12 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
-#include "icing/absl_ports/str_join.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-terms.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/schema-store.h"
-#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/tokenization/language-segmenter.h"
@@ -51,31 +50,13 @@ namespace lib {
namespace {
-const PropertyProto* GetProperty(const DocumentProto& document,
- std::string_view property_name) {
- for (const PropertyProto& property : document.properties()) {
- if (property.name() == property_name) {
- return &property;
- }
- }
- return nullptr;
-}
-
-inline std::string AddPropertyToPath(const std::string& current_path,
- std::string_view property) {
- if (current_path.empty()) {
- return std::string(property);
- }
- return absl_ports::StrCat(current_path, kPropertySeparator, property);
-}
-
inline std::string AddIndexToPath(int values_size, int index,
const std::string& property_path) {
if (values_size == 1) {
return property_path;
}
- return absl_ports::StrCat(property_path, kLBracket, std::to_string(index),
- kRBracket);
+ return absl_ports::StrCat(
+ property_path, property_util::ConvertToPropertyExprIndexStr(index));
}
// Returns a string of the normalized text of the input Token. Normalization
@@ -639,14 +620,14 @@ void RetrieveSnippetForSection(
SnippetProto* snippet_proto) {
std::string_view next_property_name = section_path.at(section_path_index);
const PropertyProto* current_property =
- GetProperty(document, next_property_name);
+ property_util::GetPropertyProto(document, next_property_name);
if (current_property == nullptr) {
ICING_VLOG(1) << "No property " << next_property_name << " found at path "
<< current_path;
return;
}
- std::string property_path =
- AddPropertyToPath(current_path, next_property_name);
+ std::string property_path = property_util::ConcatenatePropertyPathExpr(
+ current_path, next_property_name);
if (section_path_index == section_path.size() - 1) {
// We're at the end. Let's check our values.
GetEntriesFromProperty(current_property, property_path, matcher, tokenizer,
@@ -711,7 +692,7 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
}
const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
std::vector<std::string_view> section_path =
- absl_ports::StrSplit(metadata->path, kPropertySeparator);
+ property_util::SplitPropertyPathExpr(metadata->path);
// Match type must be as restrictive as possible. Prefix matches for a
// snippet should only be included if both the query is Prefix and the
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
index 8d3aecb..d1ef42f 100644
--- a/icing/schema-builder.h
+++ b/icing/schema-builder.h
@@ -128,6 +128,13 @@ class PropertyConfigBuilder {
return *this;
}
+ PropertyConfigBuilder& SetJoinable(
+ JoinableConfig::ValueType::Code join_value_type, bool propagate_delete) {
+ property_.mutable_joinable_config()->set_value_type(join_value_type);
+ property_.mutable_joinable_config()->set_propagate_delete(propagate_delete);
+ return *this;
+ }
+
PropertyConfigBuilder& SetCardinality(
PropertyConfigProto::Cardinality::Code cardinality) {
property_.set_cardinality(cardinality);
diff --git a/icing/schema/joinable-property-manager-builder_test.cc b/icing/schema/joinable-property-manager-builder_test.cc
new file mode 100644
index 0000000..ac48faa
--- /dev/null
+++ b/icing/schema/joinable-property-manager-builder_test.cc
@@ -0,0 +1,446 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+
+class JoinablePropertyManagerBuilderTest : public ::testing::Test {
+ protected:
+ void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+};
+
+TEST_F(JoinablePropertyManagerBuilderTest, Build) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 1));
+
+ PropertyConfigProto prop_foo =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ PropertyConfigProto prop_bar =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ PropertyConfigProto prop_baz =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ // Add "foo" and "bar" to "SchemaTypeOne" (schema_type_id = 0).
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_foo, /*property_path=*/"foo"));
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_bar, /*property_path=*/"bar"));
+ // Add "baz" to "SchemaTypeTwo" (schema_type_id = 1).
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/1, prop_baz, /*property_path=*/"baz"));
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ // Check "SchemaTypeOne"
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList("SchemaTypeOne"),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"foo", prop_foo),
+ EqualsJoinablePropertyMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"bar",
+ prop_bar)))));
+ // Check "SchemaTypeTwo"
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList("SchemaTypeTwo"),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"baz", prop_baz)))));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest, TooManyPropertiesShouldFail) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaType", 0));
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ // Add kTotalNumJoinableProperties joinable properties
+ for (int i = 0; i < kTotalNumJoinableProperties; i++) {
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"property" + std::to_string(i)));
+ }
+
+ // Add another joinable property. This should fail.
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ EXPECT_THAT(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"propertyExceed"),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+ HasSubstr("Too many properties")));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest, InvalidSchemaTypeIdShouldFail) {
+ // Create a schema type mapper with invalid schema type id.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaType", 0));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/-1, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest,
+ SchemaTypeIdInconsistentWithSchemaTypeMapperSizeShouldFail) {
+ // Create a schema type mapper with schema type id = 2, but size of mapper is
+ // 2.
+ // Since JoinablePropertyManagerBuilder expects 2 schema type ids = [0, 1],
+ // building with schema type id = 2 should fail even though id = 2 is in
+ // schema type mapper.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 2));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/2, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest,
+ NonStringPropertiesWithQualifiedIdJoinableConfigShouldNotProcess) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 1));
+
+ // Create non-string properties with QUALIFIED_ID joinable value type.
+ std::vector<PropertyConfigProto> properties = {
+ PropertyConfigBuilder()
+ .SetName("int1")
+ .SetDataType(TYPE_INT64)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("int2")
+ .SetDataType(TYPE_INT64)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("double1")
+ .SetDataType(TYPE_DOUBLE)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("double2")
+ .SetDataType(TYPE_DOUBLE)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("boolean1")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("boolean2")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("bytes1")
+ .SetDataType(TYPE_BYTES)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("bytes2")
+ .SetDataType(TYPE_BYTES)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("document1")
+ .SetDataTypeDocument(/*schema_type=*/"SchemaTypeTwo",
+ /*index_nested_properties=*/true)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("document2")
+ .SetDataTypeDocument(/*schema_type=*/"SchemaTypeTwo",
+ /*index_nested_properties=*/true)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build()};
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ for (const PropertyConfigProto& property_config : properties) {
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ std::string(property_config.property_name())));
+ }
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ EXPECT_THAT(joinable_property_manager->GetMetadataList("SchemaTypeOne"),
+ IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+class JoinablePropertyManagerBuilderWithJoinablePropertyTest
+ : public JoinablePropertyManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(JoinablePropertyManagerBuilderWithJoinablePropertyTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following type is considered joinable:
+// - String with QUALIFIED_ID joinable value type
+INSTANTIATE_TEST_SUITE_P(
+ JoinablePropertyManagerBuilderWithJoinablePropertyTest,
+ JoinablePropertyManagerBuilderWithJoinablePropertyTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ // Indexable string can be configured joinable as well. For
+ // convenience, just test one indexable string config.
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+class JoinablePropertyManagerBuilderWithNonJoinablePropertyTest
+ : public JoinablePropertyManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(JoinablePropertyManagerBuilderWithNonJoinablePropertyTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ JoinablePropertyManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+ std::move(builder).Build();
+ EXPECT_THAT(
+ joinable_property_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+// All types without JoinableConfig (i.e. joinable value type = NONE by default)
+// are considered non-joinable. Other mismatching types (e.g. non-string
+// properties with QUALIFIED_ID joinable value type) were tested individually
+// above.
+INSTANTIATE_TEST_SUITE_P(
+ JoinablePropertyManagerBuilderWithNonJoinablePropertyTest,
+ JoinablePropertyManagerBuilderWithNonJoinablePropertyTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ // Indexable but non-joinable string
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property-manager.cc b/icing/schema/joinable-property-manager.cc
new file mode 100644
index 0000000..5f8f7b8
--- /dev/null
+++ b/icing/schema/joinable-property-manager.cc
@@ -0,0 +1,175 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/joinable-property-manager.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/property-util.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to append a new joinable property metadata
+libtextclassifier3::Status AppendNewJoinablePropertyMetadata(
+ std::vector<JoinablePropertyMetadata>* metadata_list,
+ std::string&& concatenated_path,
+ PropertyConfigProto::DataType::Code data_type,
+ JoinableConfig::ValueType::Code value_type) {
+ // Validates next joinable property id, makes sure that joinable property id
+ // is the same as the list index so that we could find any joinable property
+ // metadata by id in O(1) later.
+ JoinablePropertyId new_id =
+ static_cast<JoinablePropertyId>(metadata_list->size());
+ if (!IsJoinablePropertyIdValid(new_id)) {
+ // Max number of joinable properties reached
+ return absl_ports::OutOfRangeError(
+ IcingStringUtil::StringPrintf("Too many properties to be joinable, max "
+ "number of properties allowed: %d",
+ kTotalNumJoinableProperties));
+ }
+
+ // Creates joinable property metadata
+ metadata_list->push_back(JoinablePropertyMetadata(
+ new_id, data_type, value_type, std::move(concatenated_path)));
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+void AppendJoinablePropertyContent(
+ JoinablePropertyMetadata joinable_property_metadata,
+ libtextclassifier3::StatusOr<std::vector<T>>&& joinable_property_content_or,
+ std::vector<JoinableProperty<T>>& joinable_property_out) {
+ if (!joinable_property_content_or.ok()) {
+ return;
+ }
+
+ std::vector<T> joinable_property_content =
+ std::move(joinable_property_content_or).ValueOrDie();
+ if (!joinable_property_content.empty()) {
+ // Adds to result vector if joinable property is found in document
+ joinable_property_out.emplace_back(std::move(joinable_property_metadata),
+ std::move(joinable_property_content));
+ }
+}
+
+} // namespace
+
+libtextclassifier3::Status
+JoinablePropertyManager::Builder::ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path) {
+ if (schema_type_id < 0 ||
+ schema_type_id >= joinable_property_metadata_cache_.size()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
+
+ switch (property_config.data_type()) {
+ case PropertyConfigProto::DataType::STRING: {
+ if (property_config.joinable_config().value_type() ==
+ JoinableConfig::ValueType::QUALIFIED_ID) {
+ ICING_RETURN_IF_ERROR(AppendNewJoinablePropertyMetadata(
+ &joinable_property_metadata_cache_[schema_type_id],
+ std::move(property_path), PropertyConfigProto::DataType::STRING,
+ JoinableConfig::ValueType::QUALIFIED_ID));
+ }
+ break;
+ }
+ default: {
+ // Skip other data types.
+ break;
+ }
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<JoinablePropertyGroup>
+JoinablePropertyManager::ExtractJoinableProperties(
+ const DocumentProto& document) const {
+ ICING_ASSIGN_OR_RETURN(
+ const std::vector<JoinablePropertyMetadata>* metadata_list,
+ GetMetadataList(document.schema()));
+ JoinablePropertyGroup joinable_property_group;
+ for (const JoinablePropertyMetadata& joinable_property_metadata :
+ *metadata_list) {
+ switch (joinable_property_metadata.data_type) {
+ case PropertyConfigProto::DataType::STRING: {
+ if (joinable_property_metadata.value_type ==
+ JoinableConfig::ValueType::QUALIFIED_ID) {
+ AppendJoinablePropertyContent(
+ joinable_property_metadata,
+ property_util::ExtractPropertyValuesFromDocument<
+ std::string_view>(document, joinable_property_metadata.path),
+ joinable_property_group.qualified_id_properties);
+ }
+ break;
+ }
+ default: {
+ // Skip other data types.
+ break;
+ }
+ }
+ }
+ return joinable_property_group;
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+JoinablePropertyManager::GetJoinablePropertyMetadata(
+ SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const {
+ if (schema_type_id < 0 ||
+ schema_type_id >= joinable_property_metadata_cache_.size()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
+ if (!IsJoinablePropertyIdValid(joinable_property_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Invalid joinable property id %d", joinable_property_id));
+ }
+
+ const std::vector<JoinablePropertyMetadata>& joinable_property_metadatas =
+ joinable_property_metadata_cache_[schema_type_id];
+ if (joinable_property_id >= joinable_property_metadatas.size()) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Joinable property with id %d doesn't exist in type config id %d",
+ joinable_property_id, schema_type_id));
+ }
+
+ // The index of metadata list is the same as the joinable property id, so we
+ // can use joinable property id as the index.
+ return &joinable_property_metadatas[joinable_property_id];
+}
+
+libtextclassifier3::StatusOr<const std::vector<JoinablePropertyMetadata>*>
+JoinablePropertyManager::GetMetadataList(
+ const std::string& type_config_name) const {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ schema_type_mapper_.Get(type_config_name));
+ return &joinable_property_metadata_cache_.at(schema_type_id);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property-manager.h b/icing/schema/joinable-property-manager.h
new file mode 100644
index 0000000..a175ae4
--- /dev/null
+++ b/icing/schema/joinable-property-manager.h
@@ -0,0 +1,136 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
+#define ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+// This class provides joinable-property-related operations. It assigns joinable
+// properties according to JoinableConfig and extracts joinable property values
+// from documents.
+class JoinablePropertyManager {
+ public:
+ // Builder class to create a JoinablePropertyManager which does not take
+ // ownership of any input components, and all pointers must refer to valid
+ // objects that outlive the created JoinablePropertyManager instance.
+ class Builder {
+ public:
+ explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
+ : schema_type_mapper_(schema_type_mapper),
+ joinable_property_metadata_cache_(schema_type_mapper.num_keys()) {}
+
+ // Checks and appends a new JoinablePropertyMetadata for the schema type id
+ // if the given property config is joinable.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
+ // schema_type_mapper_.num_keys() - 1])
+ // - OUT_OF_RANGE_ERROR if # of joinable properties in a single Schema
+ // exceeds the threshold (kTotalNumJoinableProperties)
+ libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path);
+
+ // Builds and returns a JoinablePropertyManager instance.
+ std::unique_ptr<JoinablePropertyManager> Build() && {
+ return std::unique_ptr<JoinablePropertyManager>(
+ new JoinablePropertyManager(
+ schema_type_mapper_,
+ std::move(joinable_property_metadata_cache_)));
+ }
+
+ private:
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own.
+ std::vector<std::vector<JoinablePropertyMetadata>>
+ joinable_property_metadata_cache_;
+ };
+
+ JoinablePropertyManager(const JoinablePropertyManager&) = delete;
+ JoinablePropertyManager& operator=(const JoinablePropertyManager&) = delete;
+
+ // Extracts all joinable property contents of different types from the given
+ // document and group them by joinable value type.
+ // - Joinable properties are sorted by joinable property id in ascending
+ // order.
+ // - Joinable property ids start from 0.
+ // - Joinable properties with empty content won't be returned.
+ //
+ // Returns:
+ // - A JoinablePropertyGroup instance on success
+ // - NOT_FOUND_ERROR if the type config name of document is not present in
+ // schema_type_mapper_
+ libtextclassifier3::StatusOr<JoinablePropertyGroup> ExtractJoinableProperties(
+ const DocumentProto& document) const;
+
+ // Returns the JoinablePropertyMetadata associated with the JoinablePropertyId
+ // that's in the SchemaTypeId.
+ //
+ // Returns:
+ // - Pointer to JoinablePropertyMetadata on success
+ // - INVALID_ARGUMENT_ERROR if schema type id or JoinablePropertyId is
+ // invalid
+ libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+ GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+ JoinablePropertyId joinable_property_id) const;
+
+ // Returns:
+ // - On success, the joinable property metadatas for the specified type
+ // - NOT_FOUND_ERROR if the type config name is not present in
+ // schema_type_mapper_
+ libtextclassifier3::StatusOr<const std::vector<JoinablePropertyMetadata>*>
+ GetMetadataList(const std::string& type_config_name) const;
+
+ private:
+ explicit JoinablePropertyManager(
+ const KeyMapper<SchemaTypeId>& schema_type_mapper,
+ std::vector<std::vector<JoinablePropertyMetadata>>&&
+ joinable_property_metadata_cache)
+ : schema_type_mapper_(schema_type_mapper),
+ joinable_property_metadata_cache_(joinable_property_metadata_cache) {}
+
+ // Maps schema types to a densely-assigned unique id.
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own
+
+ // The index of joinable_property_metadata_cache_ corresponds to a schema
+ // type's SchemaTypeId. At that SchemaTypeId index, we store an inner vector.
+ // The inner vector's index corresponds to a joinable property's
+ // JoinablePropertyId. At the JoinablePropertyId index, we store the
+ // JoinablePropertyMetadata of that joinable property.
+ //
+ // For example, suppose "email" has a SchemaTypeId of 0 and it has a joinable
+ // property called "senderQualifiedId" with a JoinablePropertyId of 1. Then
+ // the "senderQualifiedId" property's JoinablePropertyMetadata will be at
+ // joinable_property_metadata_cache_[0][1].
+ const std::vector<std::vector<JoinablePropertyMetadata>>
+ joinable_property_metadata_cache_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
diff --git a/icing/schema/joinable-property-manager_test.cc b/icing/schema/joinable-property-manager_test.cc
new file mode 100644
index 0000000..495c254
--- /dev/null
+++ b/icing/schema/joinable-property-manager_test.cc
@@ -0,0 +1,430 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/joinable-property-manager.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-type-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+// type and property names of Email
+static constexpr char kTypeEmail[] = "Email";
+// joinable
+static constexpr char kPropertyReceiverQualifiedId[] = "receiverQualifiedId";
+static constexpr char kPropertySenderQualifiedId[] = "senderQualifiedId";
+// non-joinable
+static constexpr char kPropertyAttachment[] = "attachment";
+static constexpr char kPropertySubject[] = "subject";
+static constexpr char kPropertyText[] = "text";
+static constexpr char kPropertyTimestamp[] = "timestamp";
+
+// type and property names of Conversation
+static constexpr char kTypeConversation[] = "Conversation";
+// joinable
+static constexpr char kPropertyEmails[] = "emails";
+static constexpr char kPropertyGroupQualifiedId[] = "groupQualifiedId";
+// non-joinable
+static constexpr char kPropertyName[] = "name";
+static constexpr char kPropertyNumber[] = "number";
+
+constexpr int64_t kDefaultTimestamp = 1663274901;
+
+PropertyConfigProto CreateSenderQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySenderQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+PropertyConfigProto CreateReceiverQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyReceiverQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+PropertyConfigProto CreateGroupQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyGroupQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(CreateSenderQualifiedIdPropertyConfig())
+ .AddProperty(CreateReceiverQualifiedIdPropertyConfig())
+ .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNumber)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(CreateGroupQualifiedIdPropertyConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(kTypeEmail, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+}
+
+class JoinablePropertyManagerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/icing";
+
+ type_config_map_.emplace(kTypeEmail, CreateEmailTypeConfig());
+ type_config_map_.emplace(kTypeConversation, CreateConversationTypeConfig());
+
+ email_document_ =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema(kTypeEmail)
+ .AddStringProperty(kPropertySubject, "the subject")
+ .AddStringProperty(kPropertyText, "the text")
+ .AddStringProperty(kPropertySenderQualifiedId, "pkg$db/ns#Person1")
+ .AddStringProperty(kPropertyReceiverQualifiedId,
+ "pkg$db/ns#Person2")
+ .AddBytesProperty(kPropertyAttachment, "attachment")
+ .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
+ .Build();
+
+ conversation_document_ =
+ DocumentBuilder()
+ .SetKey("icing", "conversation/1")
+ .SetSchema(kTypeConversation)
+ .AddStringProperty(kPropertyName, "the conversation")
+ .AddInt64Property(kPropertyNumber, 2)
+ .AddDocumentProperty(kPropertyEmails,
+ DocumentProto(email_document_))
+ .AddStringProperty(kPropertyGroupQualifiedId,
+ "pkg$db/ns#GroupQualifiedId1")
+ .Build();
+
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+ // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+ int key_mapper_size = 3 * 128 * 1024;
+ ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_, key_mapper_size));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+ }
+
+ void TearDown() override {
+ schema_type_mapper_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+ SchemaUtil::TypeConfigMap type_config_map_;
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+ DocumentProto email_document_;
+ DocumentProto conversation_document_;
+};
+
+TEST_F(JoinablePropertyManagerTest, ExtractJoinableProperties) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Extracts all joinable properties from 'Email' document
+ ICING_ASSERT_OK_AND_ASSIGN(JoinablePropertyGroup joinable_property_group,
+ schema_type_manager->joinable_property_manager()
+ .ExtractJoinableProperties(email_document_));
+
+ // Qualified Id joinable properties
+ EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(2));
+
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[0].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+ ElementsAre("pkg$db/ns#Person2"));
+
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[1].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[1].values,
+ ElementsAre("pkg$db/ns#Person1"));
+}
+
+TEST_F(JoinablePropertyManagerTest, ExtractJoinablePropertiesNested) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Extracts all joinable properties from 'Conversation' document
+ ICING_ASSERT_OK_AND_ASSIGN(
+ JoinablePropertyGroup joinable_property_group,
+ schema_type_manager->joinable_property_manager()
+ .ExtractJoinableProperties(conversation_document_));
+
+ // Qualified Id joinable properties
+ EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(3));
+
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+ ElementsAre("pkg$db/ns#Person2"));
+
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[1].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[1].values,
+ ElementsAre("pkg$db/ns#Person1"));
+
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[2].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[2].values,
+ ElementsAre("pkg$db/ns#GroupQualifiedId1"));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ ExtractJoinablePropertiesShouldIgnoreEmptyContents) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Create an email document without receiverQualifiedId.
+ DocumentProto another_email_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/2")
+ .SetSchema(kTypeEmail)
+ .AddStringProperty(kPropertySubject, "the subject")
+ .AddStringProperty(kPropertyText, "the text")
+ .AddBytesProperty(kPropertyAttachment, "attachment")
+ .AddStringProperty(kPropertySenderQualifiedId, "pkg$db/ns#Person1")
+ .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ JoinablePropertyGroup joinable_property_group,
+ schema_type_manager->joinable_property_manager()
+ .ExtractJoinableProperties(another_email_document));
+
+ // ExtractJoinableProperties should ignore receiverQualifiedId and not append
+ // a JoinableProperty instance of it into the vector.
+ EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(1));
+ EXPECT_THAT(
+ joinable_property_group.qualified_id_properties[0].metadata,
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()));
+ EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+ ElementsAre("pkg$db/ns#Person1"));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadata) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (joinable property id -> joinable property path):
+ // 0 -> receiverQualifiedId
+ // 1 -> senderQualifiedId
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/0),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/1),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()))));
+
+ // Conversation (joinable property id -> joinable property path):
+ // 0 -> emails.receiverQualifiedId
+ // 1 -> emails.senderQualifiedId
+ // 2 -> groupQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/0),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.receiverQualifiedId",
+ CreateReceiverQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/1),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/2),
+ IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()))));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ GetJoinablePropertyMetadataInvalidSchemaTypeId) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+ ASSERT_THAT(type_config_map_, SizeIs(2));
+
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/-1,
+ /*joinable_property_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/2,
+ /*joinable_property_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+ GetJoinablePropertyMetadataInvalidJoinablePropertyId) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (joinable property id -> joinable property path):
+ // 0 -> receiverQualifiedId
+ // 1 -> senderQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+ /*joinable_property_id=*/2),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Conversation (joinable property id -> joinable property path):
+ // 0 -> emails.receiverQualifiedId
+ // 1 -> emails.senderQualifiedId
+ // 2 -> groupQualifiedId
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->joinable_property_manager()
+ .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+ /*joinable_property_id=*/3),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+// Note: valid GetMetadataList has been tested in
+// JoinablePropertyManagerBuildTest.
+TEST_F(JoinablePropertyManagerTest, GetMetadataListInvalidSchemaTypeName) {
+ // Use SchemaTypeManager factory method to instantiate
+ // JoinablePropertyManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ EXPECT_THAT(schema_type_manager->joinable_property_manager().GetMetadataList(
+ "NonExistingSchemaTypeName"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/joinable-property.h b/icing/schema/joinable-property.h
new file mode 100644
index 0000000..057bb74
--- /dev/null
+++ b/icing/schema/joinable-property.h
@@ -0,0 +1,132 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_JOINABLE_PROPERTY_H_
+#define ICING_SCHEMA_JOINABLE_PROPERTY_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+using JoinablePropertyId = int8_t;
+
+// 6 bits for 64 values.
+inline constexpr int kJoinablePropertyIdBits = 6;
+inline constexpr JoinablePropertyId kTotalNumJoinableProperties =
+ (INT8_C(1) << kJoinablePropertyIdBits);
+inline constexpr JoinablePropertyId kInvalidJoinablePropertyId =
+ kTotalNumJoinableProperties;
+inline constexpr JoinablePropertyId kMaxJoinablePropertyId =
+ kTotalNumJoinableProperties - 1;
+inline constexpr JoinablePropertyId kMinJoinablePropertyId = 0;
+
+constexpr bool IsJoinablePropertyIdValid(
+ JoinablePropertyId joinable_property_id) {
+ return joinable_property_id >= kMinJoinablePropertyId &&
+ joinable_property_id <= kMaxJoinablePropertyId;
+}
+
+static_assert(
+ kJoinablePropertyIdBits < 8 * sizeof(JoinablePropertyId),
+ "Cannot exhaust all bits of JoinablePropertyId since it is a signed "
+ "integer and the most significant bit should be preserved.");
+
+struct JoinablePropertyMetadata {
+ // Dot-joined property names, representing the location of joinable property
+ // inside an document. E.g. "property1.property2".
+ std::string path;
+
+ // A unique id of joinable property.
+ JoinablePropertyId id;
+
+ // Data type of this joinable property values. Currently we only support
+ // STRING.
+ PropertyConfigProto::DataType::Code data_type;
+
+ // How values will be used as a joining matcher.
+ //
+ // JoinableConfig::ValueType::QUALIFIED_ID:
+ // Value in this property is a joinable (string) qualified id. Qualified id
+ // is composed of namespace and uri, and it will be used as the identifier
+ // of the parent document. Note: it is invalid to use this value type with
+ // non-string DataType.
+ JoinableConfig::ValueType::Code value_type;
+
+ explicit JoinablePropertyMetadata(
+ JoinablePropertyId id_in,
+ PropertyConfigProto::DataType::Code data_type_in,
+ JoinableConfig::ValueType::Code value_type_in, std::string&& path_in)
+ : path(std::move(path_in)),
+ id(id_in),
+ data_type(data_type_in),
+ value_type(value_type_in) {}
+
+ JoinablePropertyMetadata(const JoinablePropertyMetadata& other) = default;
+ JoinablePropertyMetadata& operator=(const JoinablePropertyMetadata& other) =
+ default;
+
+ JoinablePropertyMetadata(JoinablePropertyMetadata&& other) = default;
+ JoinablePropertyMetadata& operator=(JoinablePropertyMetadata&& other) =
+ default;
+
+ bool operator==(const JoinablePropertyMetadata& rhs) const {
+ return path == rhs.path && id == rhs.id && data_type == rhs.data_type &&
+ value_type == rhs.value_type;
+ }
+};
+
+// JoinableProperty is an icing internal concept similar to document property
+// values (contents), but with extra metadata. the data type of value is
+// specified by template.
+//
+// Current supported data types:
+// - std::string_view (PropertyConfigProto::DataType::STRING)
+template <typename T>
+struct JoinableProperty {
+ JoinablePropertyMetadata metadata;
+ std::vector<T> values;
+
+ explicit JoinableProperty(JoinablePropertyMetadata&& metadata_in,
+ std::vector<T>&& values_in)
+ : metadata(std::move(metadata_in)), values(std::move(values_in)) {}
+
+ PropertyConfigProto::DataType::Code data_type() const {
+ return metadata.data_type;
+ }
+
+ JoinableConfig::ValueType::Code value_type() const {
+ return metadata.value_type;
+ }
+};
+
+// Groups of different type joinable properties. Callers can access joinable
+// properties with types they want and avoid going through non-desired ones.
+//
+// REQUIRES: lifecycle of the property must be longer than this object, since we
+// use std::string_view for extracting its string_values.
+struct JoinablePropertyGroup {
+ std::vector<JoinableProperty<std::string_view>> qualified_id_properties;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_JOINABLE_PROPERTY_H_
diff --git a/icing/schema/property-util.cc b/icing/schema/property-util.cc
new file mode 100644
index 0000000..7370328
--- /dev/null
+++ b/icing/schema/property-util.cc
@@ -0,0 +1,122 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/property-util.h"
+
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace property_util {
+
+std::string ConvertToPropertyExprIndexStr(int index) {
+ if (index == kWildcardPropertyIndex) {
+ return "";
+ }
+ return absl_ports::StrCat(kLBracket, std::to_string(index), kRBracket);
+}
+
+std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
+ std::string_view property_path_expr2) {
+ if (property_path_expr1.empty()) {
+ return std::string(property_path_expr2);
+ }
+ if (property_path_expr2.empty()) {
+ return std::string(property_path_expr1);
+ }
+ return absl_ports::StrCat(property_path_expr1, kPropertyPathSeparator,
+ property_path_expr2);
+}
+
+std::vector<std::string_view> SplitPropertyPathExpr(
+ std::string_view property_path_expr) {
+ return absl_ports::StrSplit(property_path_expr, kPropertyPathSeparator);
+}
+
+PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr) {
+ size_t l_bracket = property_name_expr.find(kLBracket);
+ if (l_bracket == std::string_view::npos ||
+ l_bracket >= property_name_expr.length()) {
+ return PropertyInfo(std::string(property_name_expr),
+ kWildcardPropertyIndex);
+ }
+ size_t r_bracket = property_name_expr.find(kRBracket, l_bracket);
+ if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
+ return PropertyInfo(std::string(property_name_expr),
+ kWildcardPropertyIndex);
+ }
+ std::string index_string = std::string(
+ property_name_expr.substr(l_bracket + 1, r_bracket - l_bracket - 1));
+ return PropertyInfo(std::string(property_name_expr.substr(0, l_bracket)),
+ std::stoi(index_string));
+}
+
+std::vector<PropertyInfo> ParsePropertyPathExpr(
+ std::string_view property_path_expr) {
+ std::vector<std::string_view> property_name_exprs =
+ SplitPropertyPathExpr(property_path_expr);
+
+ std::vector<PropertyInfo> property_infos;
+ property_infos.reserve(property_name_exprs.size());
+ for (std::string_view property_name_expr : property_name_exprs) {
+ property_infos.push_back(ParsePropertyNameExpr(property_name_expr));
+ }
+ return property_infos;
+}
+
+const PropertyProto* GetPropertyProto(const DocumentProto& document,
+ std::string_view property_name) {
+ for (const PropertyProto& property : document.properties()) {
+ if (property.name() == property_name) {
+ return &property;
+ }
+ }
+ return nullptr;
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string>>
+ExtractPropertyValues<std::string>(const PropertyProto& property) {
+ return std::vector<std::string>(property.string_values().begin(),
+ property.string_values().end());
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+ExtractPropertyValues<std::string_view>(const PropertyProto& property) {
+ return std::vector<std::string_view>(property.string_values().begin(),
+ property.string_values().end());
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<int64_t>>
+ExtractPropertyValues<int64_t>(const PropertyProto& property) {
+ return std::vector<int64_t>(property.int64_values().begin(),
+ property.int64_values().end());
+}
+
+} // namespace property_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/property-util.h b/icing/schema/property-util.h
new file mode 100644
index 0000000..efa599c
--- /dev/null
+++ b/icing/schema/property-util.h
@@ -0,0 +1,192 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
+#define ICING_SCHEMA_PROPERTY_UTIL_H_
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace property_util {
+
+// Definition:
+// - Expr (short for expression): with or without index.
+// - property_name: one level of property name without index. E.g. "abc", "def".
+// - property_name_expr: one level of property name with or without index. E.g.
+// "abc", "abc[0]", "def[1]".
+// - property_path: multiple levels (including one) of property names without
+// indices. E.g. "abc", "abc.def".
+// - property_path_expr: multiple levels (including one) of property name
+// expressions. E.g. "abc", "abc[0]", "abc.def",
+// "abc[0].def", "abc[0].def[1]".
+//
+// Set relationship graph (A -> B: A is a subset of B):
+//
+// property_path -> property_path_expr
+// ^ ^
+// | |
+// property_name -> property_name_expr
+inline constexpr std::string_view kPropertyPathSeparator = ".";
+inline constexpr std::string_view kLBracket = "[";
+inline constexpr std::string_view kRBracket = "]";
+
+inline constexpr int kWildcardPropertyIndex = -1;
+
+struct PropertyInfo {
+ std::string name;
+ int index;
+
+ explicit PropertyInfo(std::string name_in, int index_in)
+ : name(std::move(name_in)), index(index_in) {}
+};
+
+// Converts a property (value) index to string, wrapped by kLBracket and
+// kRBracket.
+//
+// REQUIRES: index should be valid or kWildcardPropertyIndex.
+//
+// Returns:
+// - "" if index is kWildcardPropertyIndex.
+// - kLBracket + std::to_string(index) + kRBracket for all non
+// kWildcardPropertyIndex indices.
+std::string ConvertToPropertyExprIndexStr(int index);
+
+// Concatenates 2 property path expressions.
+//
+// Returns:
+// - property_path_expr1 + "." + property_path_expr2 if both are not empty.
+// - property_path_expr1 if property_path_expr2 is empty.
+// - property_path_expr2 if property_path_expr1 is empty.
+// - "" if both are empty.
+std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
+ std::string_view property_path_expr2);
+
+// Splits a property path expression into multiple property name expressions.
+//
+// Returns: a vector of property name expressions.
+std::vector<std::string_view> SplitPropertyPathExpr(
+ std::string_view property_path_expr);
+
+// Parses a property name expression into (property name, property index). If
+// the index expression is missing, then the returned property index will be
+// kWildcardPropertyIndex.
+//
+// Examples:
+// - ParsePropertyNameExpr("foo") will return ("foo",
+// kWildcardPropertyIndex).
+// - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
+//
+// Returns: a PropertyInfo instance.
+PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
+
+// Parses a property path expression into multiple (property name, property
+// index). It is similar to ParsePropertyPathExpr, except property path
+// expression can contain multiple name expressions.
+//
+// Examples:
+// - ParsePropertyPathExpr("foo") will return [("foo",
+// kWildcardPropertyIndex)].
+// - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
+// - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
+// kWildcardPropertyIndex), ("bar", 2)]
+//
+// Returns: a vector of PropertyInfo instances.
+std::vector<PropertyInfo> ParsePropertyPathExpr(
+ std::string_view property_path_expr);
+
+// Gets the desired PropertyProto from the document by given property name.
+// Since the input parameter is property name, this function only deals with
+// the first level of properties in the document and cannot deal with nested
+// documents.
+//
+// Returns:
+// - const PropertyInfo* if property name exists in the document.
+// - nullptr if property name not found.
+const PropertyProto* GetPropertyProto(const DocumentProto& document,
+ std::string_view property_name);
+
+template <typename T>
+libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
+ const PropertyProto& property) {
+ return absl_ports::UnimplementedError(
+ "Unimplemented template type for ExtractPropertyValues");
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string>>
+ExtractPropertyValues<std::string>(const PropertyProto& property);
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+ExtractPropertyValues<std::string_view>(const PropertyProto& property);
+
+template <>
+libtextclassifier3::StatusOr<std::vector<int64_t>>
+ExtractPropertyValues<int64_t>(const PropertyProto& property);
+
+template <typename T>
+libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
+ const DocumentProto& document, std::string_view property_path) {
+ // Finds the first property name in property_path
+ size_t separator_position = property_path.find(kPropertyPathSeparator);
+ std::string_view current_property_name =
+ (separator_position == std::string::npos)
+ ? property_path
+ : property_path.substr(0, separator_position);
+
+ const PropertyProto* property_proto =
+ GetPropertyProto(document, current_property_name);
+ if (property_proto == nullptr) {
+ // Property name not found, it could be one of the following 2 cases:
+ // 1. The property is optional and it's not in the document
+ // 2. The property name is invalid
+ return std::vector<T>();
+ }
+
+ if (separator_position == std::string::npos) {
+ // Current property name is the last one in property path.
+ return ExtractPropertyValues<T>(*property_proto);
+ }
+
+ // Extracts property values recursively
+ std::string_view sub_property_path =
+ property_path.substr(separator_position + 1);
+ std::vector<T> nested_document_content;
+ for (const DocumentProto& nested_document :
+ property_proto->document_values()) {
+ auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
+ sub_property_path);
+ if (content_or.ok()) {
+ std::vector<T> content = std::move(content_or).ValueOrDie();
+ std::move(content.begin(), content.end(),
+ std::back_inserter(nested_document_content));
+ }
+ }
+ return nested_document_content;
+}
+
+} // namespace property_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_PROPERTY_UTIL_H_
diff --git a/icing/schema/property-util_test.cc b/icing/schema/property-util_test.cc
new file mode 100644
index 0000000..1fabb32
--- /dev/null
+++ b/icing/schema/property-util_test.cc
@@ -0,0 +1,236 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/property-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+static constexpr std::string_view kTypeTest = "Test";
+static constexpr std::string_view kPropertySingleString = "singleString";
+static constexpr std::string_view kPropertyRepeatedString = "repeatedString";
+static constexpr std::string_view kPropertySingleInteger = "singleInteger";
+static constexpr std::string_view kPropertyRepeatedInteger = "repeatedInteger";
+
+static constexpr std::string_view kTypeNestedTest = "NestedTest";
+static constexpr std::string_view kPropertyStr = "str";
+static constexpr std::string_view kPropertyNestedDocument = "nestedDocument";
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeString) {
+ PropertyProto property;
+ property.mutable_string_values()->Add("Hello, world");
+ property.mutable_string_values()->Add("Foo");
+ property.mutable_string_values()->Add("Bar");
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string>(property),
+ IsOkAndHolds(ElementsAre("Hello, world", "Foo", "Bar")));
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+ IsOkAndHolds(ElementsAre("Hello, world", "Foo", "Bar")));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeInteger) {
+ PropertyProto property;
+ property.mutable_int64_values()->Add(123);
+ property.mutable_int64_values()->Add(-456);
+ property.mutable_int64_values()->Add(0);
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<int64_t>(property),
+ IsOkAndHolds(ElementsAre(123, -456, 0)));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesMismatchedType) {
+ PropertyProto property;
+ property.mutable_int64_values()->Add(123);
+ property.mutable_int64_values()->Add(-456);
+ property.mutable_int64_values()->Add(0);
+
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesEmpty) {
+ PropertyProto property;
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string>(property),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(property_util::ExtractPropertyValues<int64_t>(property),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeUnimplemented) {
+ PropertyProto property;
+ EXPECT_THAT(property_util::ExtractPropertyValues<int32_t>(property),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocument) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "test/1")
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString), "single")
+ .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+ "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+ .Build();
+
+ // Single string
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, /*property_path=*/kPropertySingleString),
+ IsOkAndHolds(ElementsAre("single")));
+ // Repeated string
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, /*property_path=*/kPropertyRepeatedString),
+ IsOkAndHolds(ElementsAre("repeated1", "repeated2", "repeated3")));
+ // Single integer
+ EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ document, /*property_path=*/kPropertySingleInteger),
+ IsOkAndHolds(ElementsAre(123)));
+ // Repeated integer
+ EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ document, /*property_path=*/kPropertyRepeatedInteger),
+ IsOkAndHolds(ElementsAre(1, 2, 3)));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentNested) {
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("icing", "nested/1")
+ .SetSchema(std::string(kTypeNestedTest))
+ .AddStringProperty(std::string(kPropertyStr), "a", "b", "c")
+ .AddDocumentProperty(
+ std::string(kPropertyNestedDocument),
+ DocumentBuilder()
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString),
+ "single1")
+ .AddStringProperty(std::string(kPropertyRepeatedString),
+ "repeated1", "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2,
+ 3)
+ .Build(),
+ DocumentBuilder()
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString),
+ "single2")
+ .AddStringProperty(std::string(kPropertyRepeatedString),
+ "repeated4", "repeated5", "repeated6")
+ .AddInt64Property(std::string(kPropertySingleInteger), 456)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 4, 5,
+ 6)
+ .Build())
+ .Build();
+
+ // Since there are 2 nested documents, all of values at leaf will be returned.
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/"nestedDocument.singleString"),
+ IsOkAndHolds(ElementsAre("single1", "single2")));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/"nestedDocument.repeatedString"),
+ IsOkAndHolds(ElementsAre("repeated1", "repeated2", "repeated3",
+ "repeated4", "repeated5", "repeated6")));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ nested_document, /*property_path=*/"nestedDocument.singleInteger"),
+ IsOkAndHolds(ElementsAre(123, 456)));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ nested_document, /*property_path=*/"nestedDocument.repeatedInteger"),
+ IsOkAndHolds(ElementsAre(1, 2, 3, 4, 5, 6)));
+
+ // Test the property at first level
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, kPropertyStr),
+ IsOkAndHolds(ElementsAre("a", "b", "c")));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentNonExistingPaths) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "test/1")
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString), "single")
+ .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+ "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+ .Build();
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, /*property_path=*/"invalid"),
+ IsOkAndHolds(IsEmpty()));
+
+ DocumentProto nested_document =
+ DocumentBuilder()
+ .SetKey("icing", "nested/1")
+ .SetSchema(std::string(kTypeNestedTest))
+ .AddStringProperty(std::string(kPropertyStr), "a", "b", "c")
+ .AddDocumentProperty(std::string(kPropertyNestedDocument),
+ DocumentProto(document), DocumentProto(document))
+ .Build();
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/kPropertySingleString),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ nested_document, /*property_path=*/"nestedDocument.invalid"),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentTypeUnimplemented) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "test/1")
+ .SetSchema(std::string(kTypeTest))
+ .AddStringProperty(std::string(kPropertySingleString), "single")
+ .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+ "repeated2", "repeated3")
+ .AddInt64Property(std::string(kPropertySingleInteger), 123)
+ .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+ .Build();
+ EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int32_t>(
+ document, /*property_path=*/kPropertySingleString),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-property-iterator.cc b/icing/schema/schema-property-iterator.cc
new file mode 100644
index 0000000..455b61b
--- /dev/null
+++ b/icing/schema/schema-property-iterator.cc
@@ -0,0 +1,80 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-property-iterator.h"
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status SchemaPropertyIterator::Advance() {
+ while (!levels_.empty()) {
+ if (!levels_.back().Advance()) {
+ // When finishing iterating all properties of the current level, pop it
+ // from the stack (levels_), return to the previous level and resume the
+ // iteration.
+ parent_type_config_names_.erase(levels_.back().GetSchemaTypeName());
+ levels_.pop_back();
+ continue;
+ }
+
+ const PropertyConfigProto& curr_property_config =
+ levels_.back().GetCurrentPropertyConfig();
+ if (curr_property_config.data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT) {
+ // We've advanced to a leaf property.
+ return libtextclassifier3::Status::OK;
+ }
+
+ // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
+ // schema and we need to traverse the next level. Look up SchemaTypeConfig
+ // (by the schema name) by type_config_map_, and push a new level into
+ // levels_.
+ // - Each level has to record the index of property it is currently at, so
+ // we can resume the iteration when returning back to it. Also other
+ // essential info will be maintained in LevelInfo as well.
+ auto nested_type_config_iter =
+ type_config_map_.find(curr_property_config.schema_type());
+ if (nested_type_config_iter == type_config_map_.end()) {
+ // This should never happen because our schema should already be
+ // validated by this point.
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Type config not found: ", curr_property_config.schema_type()));
+ }
+
+ if (parent_type_config_names_.count(
+ nested_type_config_iter->second.schema_type()) > 0) {
+ // Cycle detected. Abort the iteration.
+ return absl_ports::InvalidArgumentError(
+ "Detect nested schema cycle dependency");
+ }
+
+ std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
+ bool is_nested_indexable = levels_.back().GetCurrentNestedIndexable() &&
+ curr_property_config.document_indexing_config()
+ .index_nested_properties();
+ levels_.push_back(LevelInfo(nested_type_config_iter->second,
+ std::move(curr_property_path),
+ is_nested_indexable));
+ parent_type_config_names_.insert(
+ nested_type_config_iter->second.schema_type());
+ }
+ return absl_ports::OutOfRangeError("End of iterator");
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-property-iterator.h b/icing/schema/schema-property-iterator.h
new file mode 100644
index 0000000..696dc72
--- /dev/null
+++ b/icing/schema/schema-property-iterator.h
@@ -0,0 +1,160 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
+#define ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-util.h"
+
+namespace icing {
+namespace lib {
+
+// SchemaPropertyIterator: a class for iterating through all properties of a
+// given SchemaTypeConfigProto in lexicographical order. Only leaf
+// (non-document-type) properties will be returned, and for document type
+// properties, the iterator will traverse down to the next nested level of
+// schema.
+class SchemaPropertyIterator {
+ public:
+ explicit SchemaPropertyIterator(
+ const SchemaTypeConfigProto& base_schema_type_config,
+ const SchemaUtil::TypeConfigMap& type_config_map)
+ : type_config_map_(type_config_map) {
+ levels_.push_back(LevelInfo(base_schema_type_config,
+ /*base_property_path=*/"",
+ /*is_nested_indexable=*/true));
+ parent_type_config_names_.insert(base_schema_type_config.schema_type());
+ }
+
+ // Gets the current property config.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ const PropertyConfigProto& GetCurrentPropertyConfig() const {
+ return levels_.back().GetCurrentPropertyConfig();
+ }
+
+ // Gets the current property path.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ std::string GetCurrentPropertyPath() const {
+ return levels_.back().GetCurrentPropertyPath();
+ }
+
+ // Gets if the current property is nested indexable.
+ //
+ // REQUIRES: The preceding call for Advance() is OK.
+ bool GetCurrentNestedIndexable() const {
+ return levels_.back().GetCurrentNestedIndexable();
+ }
+
+ // Advances to the next leaf property.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if there is no more leaf property
+ // - INVALID_ARGUMENT_ERROR if cycle dependency is detected in the nested
+ // schema
+ // - NOT_FOUND_ERROR if any nested schema name is not found in
+ // type_config_map
+ libtextclassifier3::Status Advance();
+
+ private:
+ // An inner class for maintaining the iterating state of a (nested) level.
+ // Nested SchemaTypeConfig is a tree structure, so we have to traverse it
+ // recursively to all leaf properties.
+ class LevelInfo {
+ public:
+ explicit LevelInfo(const SchemaTypeConfigProto& schema_type_config,
+ std::string base_property_path, bool is_nested_indexable)
+ : schema_type_config_(schema_type_config),
+ base_property_path_(std::move(base_property_path)),
+ sorted_property_indices_(schema_type_config.properties_size()),
+ current_vec_idx_(-1),
+ is_nested_indexable_(is_nested_indexable) {
+ // Index sort property by lexicographical order.
+ std::iota(sorted_property_indices_.begin(),
+ sorted_property_indices_.end(),
+ /*value=*/0);
+ std::sort(
+ sorted_property_indices_.begin(), sorted_property_indices_.end(),
+ [&schema_type_config](int lhs_idx, int rhs_idx) -> bool {
+ return schema_type_config.properties(lhs_idx).property_name() <
+ schema_type_config.properties(rhs_idx).property_name();
+ });
+ }
+
+ bool Advance() {
+ return ++current_vec_idx_ < sorted_property_indices_.size();
+ }
+
+ const PropertyConfigProto& GetCurrentPropertyConfig() const {
+ return schema_type_config_.properties(
+ sorted_property_indices_[current_vec_idx_]);
+ }
+
+ std::string GetCurrentPropertyPath() const {
+ return property_util::ConcatenatePropertyPathExpr(
+ base_property_path_, GetCurrentPropertyConfig().property_name());
+ }
+
+ bool GetCurrentNestedIndexable() const { return is_nested_indexable_; }
+
+ std::string_view GetSchemaTypeName() const {
+ return schema_type_config_.schema_type();
+ }
+
+ private:
+ const SchemaTypeConfigProto& schema_type_config_; // Does not own
+
+ // Concatenated property path of all parent levels.
+ std::string base_property_path_;
+
+ // We perform index sort (comparing property name) in order to iterate all
+ // leaf properties in lexicographical order. This vector is for storing
+ // these sorted indices.
+ std::vector<int> sorted_property_indices_;
+ int current_vec_idx_;
+
+ // Indicates if the current level is nested indexable. Document type
+ // property has index_nested_properties flag indicating whether properties
+ // under this level should be indexed or not. If any of parent document type
+ // property sets its flag false, then all child level properties should not
+ // be indexed.
+ bool is_nested_indexable_;
+ };
+
+ const SchemaUtil::TypeConfigMap& type_config_map_; // Does not own
+
+ // For maintaining the stack of recursive nested schema type traversal. We use
+ // std::vector instead of std::stack to avoid memory allocate and free too
+ // frequently.
+ std::vector<LevelInfo> levels_;
+
+ // Maintaining all traversed parent schema type config names of the current
+ // stack (levels_). It is used to detect nested schema cycle dependency.
+ std::unordered_set<std::string_view> parent_type_config_names_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
diff --git a/icing/schema/schema-property-iterator_test.cc b/icing/schema/schema-property-iterator_test.cc
new file mode 100644
index 0000000..e14eabb
--- /dev/null
+++ b/icing/schema/schema-property-iterator_test.cc
@@ -0,0 +1,470 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-property-iterator.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-util.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+TEST(SchemaPropertyIteratorTest,
+ SingleLevelSchemaTypeConfigShouldIterateInCorrectOrder) {
+ std::string schema_type_name = "Schema";
+
+ SchemaTypeConfigProto schema_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name)
+ .AddProperty(PropertyConfigBuilder().SetName("Google").SetDataType(
+ TYPE_STRING))
+ .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+ TYPE_BYTES))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Alphabet")
+ .SetDataType(TYPE_INT64))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name, schema_type_config}};
+
+ SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Alphabet"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config.properties(2)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Youtube"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ NestedSchemaTypeConfigShouldIterateInCorrectOrder) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(PropertyConfigBuilder().SetName("Google").SetDataType(
+ TYPE_STRING))
+ .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+ TYPE_BYTES))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Alphabet")
+ .SetDataType(TYPE_INT64))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Foo").SetDataType(TYPE_STRING))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Hello").SetDataType(TYPE_STRING))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("World").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Icing").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3}};
+
+ // SchemaThree: {
+ // "Hello": TYPE_STRING,
+ // "World": TYPE_DOCUMENT SchemaOne {
+ // "Google": TYPE_STRING,
+ // "Youtube": TYPE_BYTES,
+ // "Alphabet": TYPE_INT64,
+ // },
+ // "Icing": TYPE_DOCUMENT SchemaTwo {
+ // "Foo": TYPE_STRING,
+ // "Bar": TYPE_DOCUMENT SchemaOne {
+ // "Google": TYPE_STRING,
+ // "Youtube": TYPE_BYTES,
+ // "Alphabet": TYPE_INT64,
+ // },
+ // },
+ // }
+ SchemaPropertyIterator iterator(schema_type_config3, type_config_map);
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config3.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Alphabet"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Youtube"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Alphabet"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(2)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Youtube"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ NonExistingNestedSchemaTypeConfigShouldGetNotFoundError) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(PropertyConfigBuilder().SetName("Google").SetDataType(
+ TYPE_STRING))
+ .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+ TYPE_BYTES))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Alphabet")
+ .SetDataType(TYPE_INT64))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .Build();
+ // Remove the second level (schema_type_config1) from type_config_map.
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name2, schema_type_config2}};
+
+ SchemaPropertyIterator iterator(schema_type_config2, type_config_map);
+ // Since Foo is a document type property with schema type = "SchemaOne" and
+ // "SchemaOne" is not in type_config_map, Advance() should return NOT_FOUND
+ // error.
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ SchemaTypeConfigWithEmptyPropertyShouldGetOutOfRangeErrorAtFirstAdvance) {
+ std::string schema_type_name = "Schema";
+
+ SchemaTypeConfigProto schema_type_config =
+ SchemaTypeConfigBuilder().SetType(schema_type_name).Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name, schema_type_config}};
+
+ SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ SchemaTypeConfigWithCycleDependencyShouldGetInvalidArgumentError) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2}};
+
+ SchemaPropertyIterator iterator(schema_type_config1, type_config_map);
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(SchemaPropertyIteratorTest,
+ SchemaTypeConfigWithSelfDependencyShouldGetInvalidArgumentError) {
+ std::string schema_type_name = "SchemaOne";
+
+ SchemaTypeConfigProto schema_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
+ schema_type_name, /*index_nested_properties=*/true))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name, schema_type_config}};
+
+ SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(SchemaPropertyIteratorTest, NestedIndexable) {
+ std::string schema_type_name1 = "SchemaOne";
+ std::string schema_type_name2 = "SchemaTwo";
+ std::string schema_type_name3 = "SchemaThree";
+ std::string schema_type_name4 = "SchemaFour";
+
+ SchemaTypeConfigProto schema_type_config1 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name1)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config2 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name2)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config3 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name3)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+ schema_type_name1,
+ /*index_nested_properties=*/false))
+ .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config4 =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_type_name4)
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz1").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz2").SetDataTypeDocument(
+ schema_type_name2, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz3").SetDataTypeDocument(
+ schema_type_name3, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Baz4").SetDataTypeDocument(
+ schema_type_name3, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Hello1").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("Hello2").SetDataTypeDocument(
+ schema_type_name1, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder().SetName("World").SetDataTypeString(
+ TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_type_name1, schema_type_config1},
+ {schema_type_name2, schema_type_config2},
+ {schema_type_name3, schema_type_config3},
+ {schema_type_name4, schema_type_config4}};
+
+ // SchemaFour: {
+ // "Baz1": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaTwo {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Baz2": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaTwo {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Baz3": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaThree {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Baz4": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaThree {
+ // "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "Foo": TYPE_STRING INDEXABLE,
+ // },
+ // "Hello": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+ // "Google": TYPE_STRING INDEXABLE,
+ // },
+ // "World": TYPE_STRING INDEXABLE,
+ // }
+ SchemaPropertyIterator iterator(schema_type_config4, type_config_map);
+
+ // Baz1 to Baz4: 2 levels of nested document type property.
+ // For Baz1, all levels set index_nested_properties = true, so all leaf
+ // properties should be nested indexable.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz1.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz1.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ // For Baz2, the parent level sets index_nested_properties = false, so all
+ // leaf properties in child levels should be nested unindexable even if
+ // they've set their index_nested_properties = true.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz2.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz2.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ // For Baz3, the parent level sets index_nested_properties = true, but the
+ // child level sets index_nested_properties = false.
+ // - Leaf properties in the parent level should be nested indexable.
+ // - Leaf properties in the child level should be nested unindexable.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz3.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz3.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ // For Baz4, all levels set index_nested_properties = false, so all leaf
+ // properties should be nested unindexable.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz4.Bar.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz4.Foo"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config2.properties(1)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ // Verify 1 and 0 level of nested document type properties.
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello1.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello2.Google"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config1.properties(0)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(iterator.Advance(), IsOk());
+ EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World"));
+ EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config4.properties(6)));
+ EXPECT_THAT(iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index fa094b8..79ec49a 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -35,8 +35,8 @@
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/storage.pb.h"
+#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/dynamic-trie-key-mapper.h"
@@ -149,7 +149,7 @@ SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
SchemaStore::~SchemaStore() {
if (has_schema_successfully_set_ && schema_file_ != nullptr &&
- schema_type_mapper_ != nullptr && section_manager_ != nullptr) {
+ schema_type_mapper_ != nullptr && schema_type_manager_ != nullptr) {
if (!PersistToDisk().ok()) {
ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
}
@@ -245,8 +245,8 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
type_config_map_.emplace(type_config.schema_type(), type_config);
}
ICING_ASSIGN_OR_RETURN(
- section_manager_,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ schema_type_manager_,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
return libtextclassifier3::Status::OK;
}
@@ -267,8 +267,8 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() {
}
ICING_ASSIGN_OR_RETURN(
- section_manager_,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ schema_type_manager_,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
@@ -366,13 +366,13 @@ SchemaStore::SetSchema(const SchemaProto& new_schema,
libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
SchemaStore::SetSchema(SchemaProto&& new_schema,
bool ignore_errors_and_delete_documents) {
- ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap new_dependency_map,
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap new_dependent_map,
SchemaUtil::Validate(new_schema));
// TODO(b/256022027): validate and extract joinable properties.
// - Joinable config in non-string properties should be ignored, since
// currently we only support string joining.
- // - If set joinable, the property itself and all of its parent (nested doc)
- // properties should not have REPEATED cardinality.
+ // - If set joinable, the property itself and all of its nested properties
+ // should not have REPEATED cardinality.
SetSchemaResult result;
@@ -401,7 +401,7 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
// Different schema, track the differences and see if we can still write it
SchemaUtil::SchemaDelta schema_delta =
SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- new_dependency_map);
+ new_dependent_map);
result.schema_types_new_by_name = std::move(schema_delta.schema_types_new);
result.schema_types_changed_fully_compatible_by_name =
@@ -519,33 +519,18 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
return schema_type_mapper_->Get(schema_type);
}
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SchemaStore::GetStringSectionContent(const DocumentProto& document,
- std::string_view section_path) const {
- ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->GetSectionContent<std::string_view>(document,
- section_path);
-}
-
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SchemaStore::GetStringSectionContent(const DocumentProto& document,
- SectionId section_id) const {
- ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->GetSectionContent<std::string_view>(document,
- section_id);
-}
-
libtextclassifier3::StatusOr<const SectionMetadata*>
SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->GetSectionMetadata(schema_type_id, section_id);
+ return schema_type_manager_->section_manager().GetSectionMetadata(
+ schema_type_id, section_id);
}
libtextclassifier3::StatusOr<SectionGroup> SchemaStore::ExtractSections(
const DocumentProto& document) const {
ICING_RETURN_IF_ERROR(CheckSchemaSet());
- return section_manager_->ExtractSections(document);
+ return schema_type_manager_->section_manager().ExtractSections(document);
}
libtextclassifier3::Status SchemaStore::PersistToDisk() {
@@ -571,7 +556,8 @@ SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
int num_types_sections_exhausted = 0;
for (const SchemaTypeConfigProto& type : schema->types()) {
auto sections_list_or =
- section_manager_->GetMetadataList(type.schema_type());
+ schema_type_manager_->section_manager().GetMetadataList(
+ type.schema_type());
if (!sections_list_or.ok()) {
continue;
}
@@ -589,7 +575,7 @@ SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
SchemaStore::GetSectionMetadata(const std::string& schema_type) const {
- return section_manager_->GetMetadataList(schema_type);
+ return schema_type_manager_->section_manager().GetMetadataList(schema_type);
}
libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index fdf6312..8b85fc8 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -31,8 +31,8 @@
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/storage.pb.h"
+#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
@@ -192,30 +192,6 @@ class SchemaStore {
libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
std::string_view schema_type) const;
- // Finds content of a section by section path (e.g. property1.property2)
- //
- // Returns:
- // A string of content on success
- // FAILED_PRECONDITION if schema hasn't been set yet
- // NOT_FOUND if:
- // 1. Property is optional and not found in the document
- // 2. section_path is invalid
- // 3. Content is empty
- libtextclassifier3::StatusOr<std::vector<std::string_view>>
- GetStringSectionContent(const DocumentProto& document,
- std::string_view section_path) const;
-
- // Finds content of a section by id
- //
- // Returns:
- // A string of content on success
- // FAILED_PRECONDITION if schema hasn't been set yet
- // INVALID_ARGUMENT if section id is invalid
- // NOT_FOUND if type config name of document not found
- libtextclassifier3::StatusOr<std::vector<std::string_view>>
- GetStringSectionContent(const DocumentProto& document,
- SectionId section_id) const;
-
// Returns the SectionMetadata associated with the SectionId that's in the
// SchemaTypeId.
//
@@ -392,8 +368,9 @@ class SchemaStore {
// Maps schema types to a densely-assigned unique id.
std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
- // Manager of indexed section related metadata.
- std::unique_ptr<const SectionManager> section_manager_;
+ // Manager of section (indexable property) and joinable property related
+ // metadata for all Schemas.
+ std::unique_ptr<const SchemaTypeManager> schema_type_manager_;
};
} // namespace lib
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index da04931..749fcaa 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -327,10 +327,6 @@ TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
prop->set_name("name");
prop->add_string_values("foo bar baz");
- EXPECT_THAT(store->GetStringSectionContent(doc, /*section_id=*/0),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(store->GetStringSectionContent(doc, "name"),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(store->ExtractSections(doc),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
diff --git a/icing/schema/schema-type-manager.cc b/icing/schema/schema-type-manager.cc
new file mode 100644
index 0000000..7882db5
--- /dev/null
+++ b/icing/schema/schema-type-manager.cc
@@ -0,0 +1,78 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-type-manager.h"
+
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/schema/schema-property-iterator.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<SchemaTypeManager>>
+SchemaTypeManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
+ const KeyMapper<SchemaTypeId>* schema_type_mapper) {
+ ICING_RETURN_ERROR_IF_NULL(schema_type_mapper);
+
+ SectionManager::Builder section_manager_builder(*schema_type_mapper);
+ JoinablePropertyManager::Builder joinable_property_manager_builder(
+ *schema_type_mapper);
+
+ for (const auto& [type_config_name, type_config] : type_config_map) {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ schema_type_mapper->Get(type_config_name));
+
+ // Use iterator to traverse all leaf properties of the schema.
+ SchemaPropertyIterator iterator(type_config, type_config_map);
+ while (true) {
+ libtextclassifier3::Status status = iterator.Advance();
+ if (!status.ok()) {
+ if (absl_ports::IsOutOfRange(status)) {
+ break;
+ }
+ return status;
+ }
+
+ // Process section (indexable property)
+ if (iterator.GetCurrentNestedIndexable()) {
+ ICING_RETURN_IF_ERROR(
+ section_manager_builder.ProcessSchemaTypePropertyConfig(
+ schema_type_id, iterator.GetCurrentPropertyConfig(),
+ iterator.GetCurrentPropertyPath()));
+ }
+
+ // Process joinable property
+ ICING_RETURN_IF_ERROR(
+ joinable_property_manager_builder.ProcessSchemaTypePropertyConfig(
+ schema_type_id, iterator.GetCurrentPropertyConfig(),
+ iterator.GetCurrentPropertyPath()));
+ }
+ }
+
+ return std::unique_ptr<SchemaTypeManager>(new SchemaTypeManager(
+ std::move(section_manager_builder).Build(),
+ std::move(joinable_property_manager_builder).Build()));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-type-manager.h b/icing/schema/schema-type-manager.h
new file mode 100644
index 0000000..dc5f799
--- /dev/null
+++ b/icing/schema/schema-type-manager.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
+#define ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
+
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+// This class is a wrapper of SectionManager and JoinablePropertyManager.
+class SchemaTypeManager {
+ public:
+ // Factory function to create a SchemaTypeManager which does not take
+ // ownership of any input components, and all pointers must refer to valid
+ // objects that outlive the created SchemaTypeManager instance.
+ //
+ // Returns:
+ // - A SchemaTypeManager on success
+ // - FAILED_PRECONDITION_ERROR on any null pointer input
+ // - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
+ // exceeds the threshold (kTotalNumSections, kTotalNumJoinableProperties)
+ // - INVALID_ARGUMENT_ERROR if type_config_map contains incorrect
+ // information that causes errors (e.g. invalid schema type id, cycle
+ // dependency in nested schema)
+ // - NOT_FOUND_ERROR if any nested schema name is not found in
+ // type_config_map
+ static libtextclassifier3::StatusOr<std::unique_ptr<SchemaTypeManager>>
+ Create(const SchemaUtil::TypeConfigMap& type_config_map,
+ const KeyMapper<SchemaTypeId>* schema_type_mapper);
+
+ const SectionManager& section_manager() const { return *section_manager_; }
+
+ const JoinablePropertyManager& joinable_property_manager() const {
+ return *joinable_property_manager_;
+ }
+
+ private:
+ explicit SchemaTypeManager(
+ std::unique_ptr<SectionManager> section_manager,
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager)
+ : section_manager_(std::move(section_manager)),
+ joinable_property_manager_(std::move(joinable_property_manager)) {}
+
+ std::unique_ptr<SectionManager> section_manager_;
+
+ std::unique_ptr<JoinablePropertyManager> joinable_property_manager_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
diff --git a/icing/schema/schema-type-manager_test.cc b/icing/schema/schema-type-manager_test.cc
new file mode 100644
index 0000000..93cbdee
--- /dev/null
+++ b/icing/schema/schema-type-manager_test.cc
@@ -0,0 +1,352 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-type-manager.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Pointee;
+
+// type and property names of EmailMessage
+static constexpr char kTypeEmail[] = "EmailMessage";
+// indexable (in lexicographical order)
+static constexpr char kPropertyRecipientIds[] = "recipientIds";
+static constexpr char kPropertyRecipients[] = "recipients";
+static constexpr char kPropertySenderQualifiedId[] =
+ "senderQualifiedId"; // QUALIFIED_ID joinable
+static constexpr char kPropertySubject[] = "subject";
+static constexpr char kPropertyTimestamp[] = "timestamp";
+// non-indexable
+static constexpr char kPropertyAttachment[] = "attachment";
+static constexpr char kPropertyNonIndexableInteger[] = "nonIndexableInteger";
+static constexpr char kPropertyTagQualifiedId[] =
+ "tagQualifiedId"; // QUALIFIED_ID joinable
+static constexpr char kPropertyText[] = "text";
+
+// type and property names of Conversation
+static constexpr char kTypeConversation[] = "Conversation";
+// indexable (in lexicographical order)
+static constexpr char kPropertyEmails[] = "emails";
+static constexpr char kPropertyGroupQualifiedId[] =
+ "groupQualifiedId"; // QUALIFIED_ID joinable
+static constexpr char kPropertyName[] = "name";
+// non-indexable
+static constexpr char kPropertyNestedNonIndexable[] = "nestedNonIndexable";
+static constexpr char kPropertySuperTagQualifiedId[] =
+ "superTagQualifiedId"; // QUALIFIED_ID joinable
+
+PropertyConfigProto CreateReceipientIdsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipientIds)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateRecipientsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateSenderQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySenderQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateSubjectPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateTimestampPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateTagQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyTagQualifiedId)
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateGroupQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyGroupQualifiedId)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateSuperTagQualifiedIdPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySuperTagQualifiedId)
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateNamePropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(CreateTagQualifiedIdPropertyConfig())
+ .AddProperty(CreateSubjectPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(CreateSenderQualifiedIdPropertyConfig())
+ .AddProperty(CreateRecipientsPropertyConfig())
+ .AddProperty(CreateReceipientIdsPropertyConfig())
+ .AddProperty(CreateTimestampPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNonIndexableInteger)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(CreateSuperTagQualifiedIdPropertyConfig())
+ .AddProperty(CreateNamePropertyConfig())
+ .AddProperty(CreateGroupQualifiedIdPropertyConfig())
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(kTypeEmail, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNestedNonIndexable)
+ .SetDataTypeDocument(kTypeEmail,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .Build();
+}
+
+class SchemaTypeManagerTest : public ::testing::Test {
+ protected:
+ void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+};
+
+TEST_F(SchemaTypeManagerTest, Create) {
+ SchemaUtil::TypeConfigMap type_config_map;
+ type_config_map.emplace(kTypeEmail, CreateEmailTypeConfig());
+ type_config_map.emplace(kTypeConversation, CreateConversationTypeConfig());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kTypeConversation, 1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
+
+ // Check SectionManager
+ // In the Email type, "recipientIds", "recipients", "senderQualifiedId",
+ // "subject" and "timestamp" are indexable properties. "attachment",
+ // "nonIndexableInteger", "tagQualifiedId" and "text" are non-indexable.
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetMetadataList(kTypeEmail),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"recipientIds",
+ CreateReceipientIdsPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"recipients",
+ CreateRecipientsPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/2,
+ /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"subject",
+ CreateSubjectPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/4,
+ /*expected_property_path=*/"timestamp",
+ CreateTimestampPropertyConfig())))));
+ // In the Conversation type, "groupQualifiedId" and "name" are indexable
+ // properties as are the indexable properties of the email in the "emails"
+ // property. All properties of the email in the "nestedNonIndexable" property
+ // are not indexable.
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetMetadataList(kTypeConversation),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsSectionMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.recipientIds",
+ CreateReceipientIdsPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"emails.recipients",
+ CreateRecipientsPropertyConfig()),
+ EqualsSectionMetadata(
+ /*expected_id=*/2,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"emails.subject",
+ CreateSubjectPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/4,
+ /*expected_property_path=*/"emails.timestamp",
+ CreateTimestampPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/5,
+ /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()),
+ EqualsSectionMetadata(/*expected_id=*/6,
+ /*expected_property_path=*/"name",
+ CreateNamePropertyConfig())))));
+
+ // Check JoinablePropertyManager
+ // In the Email type, "senderQualifiedId" and "tagQualifiedId" are joinable
+ // properties.
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager().GetMetadataList(
+ kTypeEmail),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"tagQualifiedId",
+ CreateTagQualifiedIdPropertyConfig())))));
+ // In the Conversation type, "groupQualifiedId" and "superTagQualifiedId" are
+ // joinable properties as are the joinable properties of the email in the
+ // "emails" and "nestedNonIndexable" property.
+ EXPECT_THAT(
+ schema_type_manager->joinable_property_manager().GetMetadataList(
+ kTypeConversation),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"emails.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/1,
+ /*expected_property_path=*/"emails.tagQualifiedId",
+ CreateTagQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+ CreateGroupQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/3,
+ /*expected_property_path=*/"nestedNonIndexable.senderQualifiedId",
+ CreateSenderQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/4,
+ /*expected_property_path=*/"nestedNonIndexable.tagQualifiedId",
+ CreateTagQualifiedIdPropertyConfig()),
+ EqualsJoinablePropertyMetadata(
+ /*expected_id=*/5,
+ /*expected_property_path=*/"superTagQualifiedId",
+ CreateSuperTagQualifiedIdPropertyConfig())))));
+}
+
+TEST_F(SchemaTypeManagerTest, CreateWithNullPointerShouldFail) {
+ SchemaUtil::TypeConfigMap type_config_map;
+ EXPECT_THAT(SchemaTypeManager::Create(type_config_map,
+ /*schema_type_mapper=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(SchemaTypeManagerTest, CreateWithSchemaNotInSchemaTypeMapperShouldFail) {
+ SchemaTypeConfigProto type_config;
+ type_config.set_schema_type("type");
+
+ auto property = type_config.add_properties();
+ property->set_property_name("property");
+ property->set_data_type(TYPE_STRING);
+ property->set_cardinality(CARDINALITY_REQUIRED);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TERM_MATCH_EXACT);
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ type_config_map.emplace("type", type_config);
+
+ // Create an empty schema type mapper
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+
+ EXPECT_THAT(
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index ccad84f..0589ada 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -51,6 +51,23 @@ bool ArePropertiesEqual(const PropertyConfigProto& old_property,
new_property.document_indexing_config().index_nested_properties();
}
+bool IsIndexedProperty(const PropertyConfigProto& property_config) {
+ switch (property_config.data_type()) {
+ case PropertyConfigProto::DataType::STRING:
+ return property_config.string_indexing_config().term_match_type() !=
+ TermMatchType::UNKNOWN;
+ case PropertyConfigProto::DataType::INT64:
+ return property_config.integer_indexing_config().numeric_match_type() !=
+ IntegerIndexingConfig::NumericMatchType::UNKNOWN;
+ case PropertyConfigProto::DataType::UNKNOWN:
+ case PropertyConfigProto::DataType::DOUBLE:
+ case PropertyConfigProto::DataType::BOOLEAN:
+ case PropertyConfigProto::DataType::BYTES:
+ case PropertyConfigProto::DataType::DOCUMENT:
+ return false;
+ }
+}
+
bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
const PropertyConfigProto& new_property) {
if (old_property.cardinality() < new_property.cardinality()) {
@@ -107,28 +124,34 @@ bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
}
+bool IsIntegerNumericMatchTypeCompatible(
+ const IntegerIndexingConfig& old_indexed,
+ const IntegerIndexingConfig& new_indexed) {
+ return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
+}
+
void AddIncompatibleChangeToDelta(
std::unordered_set<std::string>& incompatible_delta,
const SchemaTypeConfigProto& old_type_config,
- const SchemaUtil::DependencyMap& new_schema_dependency_map,
+ const SchemaUtil::DependentMap& new_schema_dependent_map,
const SchemaUtil::TypeConfigMap& old_type_config_map,
const SchemaUtil::TypeConfigMap& new_type_config_map) {
// If this type is incompatible, then every type that depends on it might
- // also be incompatible. Use the dependency map to mark those ones as
+ // also be incompatible. Use the dependent map to mark those ones as
// incompatible too.
incompatible_delta.insert(old_type_config.schema_type());
- auto parent_types_itr =
- new_schema_dependency_map.find(old_type_config.schema_type());
- if (parent_types_itr != new_schema_dependency_map.end()) {
- for (std::string_view parent_type : parent_types_itr->second) {
+ auto dependent_types_itr =
+ new_schema_dependent_map.find(old_type_config.schema_type());
+ if (dependent_types_itr != new_schema_dependent_map.end()) {
+ for (std::string_view dependent_type : dependent_types_itr->second) {
// The types from new_schema that depend on the current
// old_type_config may not present in old_schema.
// Those types will be listed at schema_delta.schema_types_new
// instead.
- std::string parent_type_str(parent_type);
- if (old_type_config_map.find(parent_type_str) !=
+ std::string dependent_type_str(dependent_type);
+ if (old_type_config_map.find(dependent_type_str) !=
old_type_config_map.end()) {
- incompatible_delta.insert(std::move(parent_type_str));
+ incompatible_delta.insert(std::move(dependent_type_str));
}
}
}
@@ -136,31 +159,30 @@ void AddIncompatibleChangeToDelta(
} // namespace
-libtextclassifier3::Status ExpandTranstiveDependencies(
- const SchemaUtil::DependencyMap& child_to_direct_parent_map,
- std::string_view type,
- SchemaUtil::DependencyMap* expanded_child_to_parent_map,
+libtextclassifier3::Status ExpandTranstiveDependents(
+ const SchemaUtil::DependentMap& dependent_map, std::string_view type,
+ SchemaUtil::DependentMap* expanded_dependent_map,
std::unordered_set<std::string_view>* pending_expansions,
std::unordered_set<std::string_view>* orphaned_types) {
- auto expanded_itr = expanded_child_to_parent_map->find(type);
- if (expanded_itr != expanded_child_to_parent_map->end()) {
+ auto expanded_itr = expanded_dependent_map->find(type);
+ if (expanded_itr != expanded_dependent_map->end()) {
// We've already expanded this type. Just return.
return libtextclassifier3::Status::OK;
}
- auto itr = child_to_direct_parent_map.find(type);
- if (itr == child_to_direct_parent_map.end()) {
+ auto itr = dependent_map.find(type);
+ if (itr == dependent_map.end()) {
// It's an orphan. Just return.
orphaned_types->insert(type);
return libtextclassifier3::Status::OK;
}
pending_expansions->insert(type);
- std::unordered_set<std::string_view> expanded_dependencies;
+ std::unordered_set<std::string_view> expanded_dependents;
- // Add all of the direct parent dependencies.
- expanded_dependencies.reserve(itr->second.size());
- expanded_dependencies.insert(itr->second.begin(), itr->second.end());
+ // Add all of the direct dependents.
+ expanded_dependents.reserve(itr->second.size());
+ expanded_dependents.insert(itr->second.begin(), itr->second.end());
- // Iterate through each direct parent and add their indirect parents.
+ // Iterate through each direct dependent and add their indirect dependents.
for (std::string_view dep : itr->second) {
// 1. Check if we're in the middle of expanding this type - IOW there's a
// cycle!
@@ -171,84 +193,77 @@ libtextclassifier3::Status ExpandTranstiveDependencies(
}
// 2. Expand this type as needed.
- ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
- child_to_direct_parent_map, dep, expanded_child_to_parent_map,
- pending_expansions, orphaned_types));
+ ICING_RETURN_IF_ERROR(
+ ExpandTranstiveDependents(dependent_map, dep, expanded_dependent_map,
+ pending_expansions, orphaned_types));
if (orphaned_types->count(dep) > 0) {
// Dep is an orphan. Just skip to the next dep.
continue;
}
- // 3. Dep has been fully expanded. Add all of its dependencies to this
- // type's dependencies.
- auto dep_expanded_itr = expanded_child_to_parent_map->find(dep);
- expanded_dependencies.reserve(expanded_dependencies.size() +
- dep_expanded_itr->second.size());
- expanded_dependencies.insert(dep_expanded_itr->second.begin(),
- dep_expanded_itr->second.end());
+ // 3. Dep has been fully expanded. Add all of its dependents to this
+ // type's dependents.
+ auto dep_expanded_itr = expanded_dependent_map->find(dep);
+ expanded_dependents.reserve(expanded_dependents.size() +
+ dep_expanded_itr->second.size());
+ expanded_dependents.insert(dep_expanded_itr->second.begin(),
+ dep_expanded_itr->second.end());
}
- expanded_child_to_parent_map->insert(
- {type, std::move(expanded_dependencies)});
+ expanded_dependent_map->insert({type, std::move(expanded_dependents)});
pending_expansions->erase(type);
return libtextclassifier3::Status::OK;
}
-// Expands the dependencies represented by the child_to_direct_parent_map to
-// also include indirect parents.
+// Calculate and return the transitive closure of dependent_map, which expands
+// the dependent_map to also include indirect dependents
//
-// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
-// type B and B has a property of type C. C and D only have non-document
-// properties.
+// Ex. Suppose we have a schema with three types A, B and C, and we have the
+// following dependent relationship.
//
-// The child to direct parent dependency map for this schema would be:
-// C -> B
-// B -> A
+// C -> B (B depends on C)
+// B -> A (A depends on B)
//
-// This function would expand it so that A is also present as an indirect parent
-// of C.
-libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
-ExpandTranstiveDependencies(
- const SchemaUtil::DependencyMap& child_to_direct_parent_map) {
- SchemaUtil::DependencyMap expanded_child_to_parent_map;
+// Then, this function would expand the map by adding C -> A to the map.
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
+ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) {
+ SchemaUtil::DependentMap expanded_dependent_map;
// Types that we are expanding.
std::unordered_set<std::string_view> pending_expansions;
- // Types that have no parents that depend on them.
+ // Types that have no dependents.
std::unordered_set<std::string_view> orphaned_types;
- for (const auto& kvp : child_to_direct_parent_map) {
- ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
- child_to_direct_parent_map, kvp.first, &expanded_child_to_parent_map,
- &pending_expansions, &orphaned_types));
+ for (const auto& kvp : dependent_map) {
+ ICING_RETURN_IF_ERROR(ExpandTranstiveDependents(
+ dependent_map, kvp.first, &expanded_dependent_map, &pending_expansions,
+ &orphaned_types));
}
- return expanded_child_to_parent_map;
+ return expanded_dependent_map;
}
-// Builds a transitive child-parent dependency map. 'Orphaned' types (types with
-// no parents) will not be present in the map.
+// Builds a transitive dependent map. 'Orphaned' types (types with no
+// dependents) will not be present in the map.
//
// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
// type B and B has a property of type C. C and D only have non-document
// properties.
//
-// The transitive child-parent dependency map for this schema would be:
-// C -> A, B
-// B -> A
+// The transitive dependent map for this schema would be:
+// C -> A, B (both A and B depend on C)
+// B -> A (A depends on B)
//
// A and D would be considered orphaned properties because no type refers to
// them.
//
// RETURNS:
-// On success, a transitive child-parent dependency map of all types in the
-// schema.
+// On success, a transitive dependent map of all types in the schema.
// INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
// ALREADY_EXISTS if a schema type is specified more than once in the schema
-libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
-BuildTransitiveDependencyGraph(const SchemaProto& schema) {
- // Child to parent map.
- SchemaUtil::DependencyMap child_to_direct_parent_map;
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
+BuildTransitiveDependentGraph(const SchemaProto& schema) {
+ SchemaUtil::DependentMap dependent_map;
- // Add all first-order dependencies.
+ // Add all first-order dependents.
std::unordered_set<std::string_view> known_types;
std::unordered_set<std::string_view> unknown_types;
for (const auto& type_config : schema.types()) {
@@ -265,21 +280,10 @@ BuildTransitiveDependencyGraph(const SchemaProto& schema) {
// Need to know what schema_type these Document properties should be
// validated against
std::string_view property_schema_type(property_config.schema_type());
- if (property_schema_type == schema_type) {
- return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Infinite loop detected in type configs. '",
- schema_type, "' references itself."));
- }
if (known_types.count(property_schema_type) == 0) {
unknown_types.insert(property_schema_type);
}
- auto itr = child_to_direct_parent_map.find(property_schema_type);
- if (itr == child_to_direct_parent_map.end()) {
- child_to_direct_parent_map.insert(
- {property_schema_type, std::unordered_set<std::string_view>()});
- itr = child_to_direct_parent_map.find(property_schema_type);
- }
- itr->second.insert(schema_type);
+ dependent_map[property_schema_type].insert(schema_type);
}
}
}
@@ -287,15 +291,15 @@ BuildTransitiveDependencyGraph(const SchemaProto& schema) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
}
- return ExpandTranstiveDependencies(child_to_direct_parent_map);
+ return ExpandTranstiveDependents(dependent_map);
}
-libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
const SchemaProto& schema) {
- // 1. Build the dependency map. This will detect any cycles, non-existent or
+ // 1. Build the dependent map. This will detect any cycles, non-existent or
// duplicate types in the schema.
- ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap dependency_map,
- BuildTransitiveDependencyGraph(schema));
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap dependent_map,
+ BuildTransitiveDependentGraph(schema));
// Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
// already.
@@ -350,7 +354,7 @@ libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
}
}
- return dependency_map;
+ return dependent_map;
}
libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
@@ -460,8 +464,7 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
// A non-default term_match_type indicates that this property is meant to be
// indexed.
- if (property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
+ if (IsIndexedProperty(property_config)) {
++parsed_property_configs.num_indexed_properties;
}
@@ -478,7 +481,7 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
const SchemaProto& old_schema, const SchemaProto& new_schema,
- const DependencyMap& new_schema_dependency_map) {
+ const DependentMap& new_schema_dependent_map) {
SchemaDelta schema_delta;
TypeConfigMap old_type_config_map, new_type_config_map;
@@ -526,9 +529,7 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// A non-default term_match_type indicates that this property is meant to
// be indexed.
- bool is_indexed_property =
- old_property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN;
+ bool is_indexed_property = IsIndexedProperty(old_property_config);
if (is_indexed_property) {
++old_indexed_properties;
}
@@ -576,6 +577,9 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
if (!IsTermMatchTypeCompatible(
old_property_config.string_indexing_config(),
new_property_config->string_indexing_config()) ||
+ !IsIntegerNumericMatchTypeCompatible(
+ old_property_config.integer_indexing_config(),
+ new_property_config->integer_indexing_config()) ||
old_property_config.document_indexing_config()
.index_nested_properties() !=
new_property_config->document_indexing_config()
@@ -627,19 +631,19 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
if (is_incompatible) {
AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
- old_type_config, new_schema_dependency_map,
+ old_type_config, new_schema_dependent_map,
old_type_config_map, new_type_config_map);
}
if (is_index_incompatible) {
AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
- old_type_config, new_schema_dependency_map,
+ old_type_config, new_schema_dependent_map,
old_type_config_map, new_type_config_map);
}
if (is_join_incompatible) {
AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
- old_type_config, new_schema_dependency_map,
+ old_type_config, new_schema_dependent_map,
old_type_config_map, new_type_config_map);
}
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index ed46621..e5747bb 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -33,12 +33,10 @@ class SchemaUtil {
using TypeConfigMap =
std::unordered_map<std::string, const SchemaTypeConfigProto>;
- // Maps from a child type to the parent types that depend on it.
- // Ex. type A has a single property of type B
- // The dependency map will be { { "B", { "A" } } }
- using DependencyMap =
- std::unordered_map<std::string_view,
- std::unordered_set<std::string_view>>;
+ // If A -> B is indicated in the map, then type A must be built before
+ // building type B, i.e. B depends on A.
+ using DependentMap = std::unordered_map<std::string_view,
+ std::unordered_set<std::string_view>>;
struct SchemaDelta {
// Which schema types were present in the old schema, but were deleted from
@@ -120,11 +118,11 @@ class SchemaUtil {
// document properties can be opted out of indexing.
//
// Returns:
- // On success, a dependency map from each child types to all parent types
+ // On success, a dependent map from each types to their dependent types
// that depend on it directly or indirectly.
// ALREADY_EXISTS for case 1 and 2
// INVALID_ARGUMENT for 3-13
- static libtextclassifier3::StatusOr<DependencyMap> Validate(
+ static libtextclassifier3::StatusOr<DependentMap> Validate(
const SchemaProto& schema);
// Creates a mapping of schema type -> schema type config proto. The
@@ -175,7 +173,7 @@ class SchemaUtil {
// Returns a SchemaDelta that captures the aforementioned differences.
static const SchemaDelta ComputeCompatibilityDelta(
const SchemaProto& old_schema, const SchemaProto& new_schema,
- const DependencyMap& new_schema_dependency_map);
+ const DependentMap& new_schema_dependent_map);
// Validates the 'property_name' field.
// 1. Can't be an empty string
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 9dbc972..44d8def 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -39,8 +39,8 @@ constexpr char kEmailType[] = "EmailMessage";
constexpr char kMessageType[] = "Text";
constexpr char kPersonType[] = "Person";
-TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
- // Create a schema with the following dependencies:
+TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
+ // Create a schema with the following dependent relation:
// C
// / \
// A - B E - F
@@ -106,7 +106,7 @@ TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
.Build();
- // Provide these in alphabetical (also parent-child) order: A, B, C, D, E, F
+ // Provide these in alphabetical order: A, B, C, D, E, F
SchemaProto schema = SchemaBuilder()
.AddType(type_a)
.AddType(type_b)
@@ -115,7 +115,7 @@ TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
.AddType(type_e)
.AddType(type_f)
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
SchemaUtil::Validate(schema));
EXPECT_THAT(d_map, testing::SizeIs(5));
EXPECT_THAT(d_map["F"],
@@ -126,8 +126,8 @@ TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
}
-TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
- // Create a schema with the following dependencies:
+TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
+ // Create a schema with the following dependent relation:
// C
// / \
// A - B E - F
@@ -193,7 +193,7 @@ TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
.Build();
- // Provide these in reverse alphabetical (also child-parent) order:
+ // Provide these in reverse alphabetical order:
// F, E, D, C, B, A
SchemaProto schema = SchemaBuilder()
.AddType(type_f)
@@ -203,7 +203,7 @@ TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
.AddType(type_b)
.AddType(type_a)
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
SchemaUtil::Validate(schema));
EXPECT_THAT(d_map, testing::SizeIs(5));
EXPECT_THAT(d_map["F"],
@@ -214,8 +214,8 @@ TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
}
-TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
- // Create a schema with the following dependencies:
+TEST(SchemaUtilTest, DependentGraphMixedOrder) {
+ // Create a schema with the following dependent relation:
// C
// / \
// A - B E - F
@@ -290,7 +290,7 @@ TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
.AddType(type_b)
.AddType(type_d)
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
SchemaUtil::Validate(schema));
EXPECT_THAT(d_map, testing::SizeIs(5));
EXPECT_THAT(d_map["F"],
@@ -302,7 +302,7 @@ TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
}
TEST(SchemaUtilTest, TopLevelCycle) {
- // Create a schema with the following dependencies:
+ // Create a schema with the following dependent relation:
// A - B - B - B - B....
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
@@ -330,7 +330,7 @@ TEST(SchemaUtilTest, TopLevelCycle) {
}
TEST(SchemaUtilTest, MultiLevelCycle) {
- // Create a schema with the following dependencies:
+ // Create a schema with the following dependent relation:
// A - B - C - A - B - C - A ...
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
@@ -367,7 +367,7 @@ TEST(SchemaUtilTest, MultiLevelCycle) {
}
TEST(SchemaUtilTest, NonExistentType) {
- // Create a schema with the following dependencies:
+ // Create a schema with the following dependent relation:
// A - B - C - X (does not exist)
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
@@ -683,9 +683,9 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_changed_fully_compatible.insert(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema_with_optional, no_dependencies_map),
+ old_schema, new_schema_with_optional, no_dependents_map),
Eq(schema_delta));
}
@@ -719,9 +719,9 @@ TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema_with_required, no_dependencies_map),
+ old_schema, new_schema_with_required, no_dependents_map),
Eq(schema_delta));
}
@@ -755,9 +755,9 @@ TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
@@ -787,10 +787,10 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
// We can't have a new schema be more restrictive, REPEATED->OPTIONAL
SchemaUtil::SchemaDelta incompatible_schema_delta;
incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/less_restrictive_schema,
- /*new_schema=*/more_restrictive_schema, no_dependencies_map),
+ /*new_schema=*/more_restrictive_schema, no_dependents_map),
Eq(incompatible_schema_delta));
// We can have the new schema be less restrictive, OPTIONAL->REPEATED;
@@ -799,7 +799,7 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
kEmailType);
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/more_restrictive_schema,
- /*new_schema=*/less_restrictive_schema, no_dependencies_map),
+ /*new_schema=*/less_restrictive_schema, no_dependents_map),
Eq(compatible_schema_delta));
}
@@ -828,9 +828,9 @@ TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
@@ -888,16 +888,16 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
// kEmailType depends on kMessageType
- SchemaUtil::DependencyMap dependencies_map = {{kMessageType, {kEmailType}}};
+ SchemaUtil::DependentMap dependents_map = {{kMessageType, {kEmailType}}};
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema, dependencies_map);
+ old_schema, new_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
EXPECT_THAT(actual.schema_types_incompatible,
testing::ElementsAre(kEmailType));
EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
}
-TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
+TEST(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
// Configure old schema
SchemaProto schema_with_indexed_property =
SchemaBuilder()
@@ -925,21 +925,21 @@ TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.insert(kPersonType);
- // New schema gained a new indexed property.
- SchemaUtil::DependencyMap no_dependencies_map;
+ // New schema gained a new indexed string property.
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
schema_with_unindexed_property, schema_with_indexed_property,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
- // New schema lost an indexed property.
+ // New schema lost an indexed string property.
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
schema_with_indexed_property, schema_with_unindexed_property,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
-TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
+TEST(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -971,12 +971,160 @@ TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.insert(kPersonType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
+TEST(SchemaUtilTest,
+ AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN,
+ TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map)
+ .schema_types_index_incompatible,
+ IsEmpty());
+}
+
+TEST(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) {
+ // Configure old schema
+ SchemaProto schema_with_indexed_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto schema_with_unindexed_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+
+ // New schema gained a new indexed integer property.
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_unindexed_property, schema_with_indexed_property,
+ no_dependents_map),
+ Eq(schema_delta));
+
+ // New schema lost an indexed integer property.
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_indexed_property, schema_with_unindexed_property,
+ no_dependents_map),
+ Eq(schema_delta));
+}
+
+TEST(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewIndexedProperty")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map),
+ Eq(schema_delta));
+}
+
+TEST(SchemaUtilTest,
+ AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) {
+ // Configure old schema
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Configure new schema
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::DependentMap no_dependents_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependents_map)
+ .schema_types_index_incompatible,
+ IsEmpty());
+}
+
TEST(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
// Configure old schema
SchemaProto schema_with_joinable_property =
@@ -1006,16 +1154,16 @@ TEST(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
expected_schema_delta.schema_types_join_incompatible.insert(kPersonType);
// New schema gained a new joinable property.
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
schema_with_non_joinable_property,
- schema_with_joinable_property, no_dependencies_map),
+ schema_with_joinable_property, no_dependents_map),
Eq(expected_schema_delta));
// New schema lost a joinable property.
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
schema_with_joinable_property,
- schema_with_non_joinable_property, no_dependencies_map),
+ schema_with_non_joinable_property, no_dependents_map),
Eq(expected_schema_delta));
}
@@ -1051,9 +1199,9 @@ TEST(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) {
SchemaUtil::SchemaDelta expected_schema_delta;
expected_schema_delta.schema_types_join_incompatible.insert(kPersonType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(expected_schema_delta));
}
@@ -1087,9 +1235,9 @@ TEST(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map)
+ no_dependents_map)
.schema_types_join_incompatible,
IsEmpty());
}
@@ -1128,9 +1276,9 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_new.insert(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
@@ -1169,9 +1317,9 @@ TEST(SchemaUtilTest, DeletingTypeIsNoted) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_deleted.emplace(kPersonType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
- no_dependencies_map),
+ no_dependents_map),
Eq(schema_delta));
}
@@ -1207,9 +1355,9 @@ TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
schema_delta.schema_types_index_incompatible.emplace(kEmailType);
- SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::DependentMap no_dependents_map;
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
- old_schema, new_schema, no_dependencies_map);
+ old_schema, new_schema, no_dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
}
@@ -1255,16 +1403,16 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
// unaffected.
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_index_incompatible.emplace(kPersonType);
- SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}};
+ SchemaUtil::DependentMap dependents_map = {{kEmailType, {kPersonType}}};
SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
- no_nested_index_schema, nested_index_schema, dependencies_map);
+ no_nested_index_schema, nested_index_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
// Going from index_nested_properties=true to index_nested_properties=false
// should also make kPersonType index_incompatible. kEmailType should be
// unaffected.
actual = SchemaUtil::ComputeCompatibilityDelta(
- nested_index_schema, no_nested_index_schema, dependencies_map);
+ nested_index_schema, no_nested_index_schema, dependents_map);
EXPECT_THAT(actual, Eq(schema_delta));
}
@@ -1321,19 +1469,19 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("ChildSchema"))
+ .AddType(SchemaTypeConfigBuilder().SetType("InnerSchema"))
.AddType(SchemaTypeConfigBuilder()
- .SetType("ParentSchema")
+ .SetType("OuterSchema")
.AddProperty(PropertyConfigBuilder()
- .SetName("ChildProperty1")
+ .SetName("InnerProperty1")
.SetDataTypeDocument(
- "ChildSchema",
+ "InnerSchema",
/*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED))
.AddProperty(PropertyConfigBuilder()
- .SetName("ChildProperty2")
+ .SetName("InnerProperty2")
.SetDataTypeDocument(
- "ChildSchema",
+ "InnerSchema",
/*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
@@ -1385,7 +1533,7 @@ TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
}
TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
- // Create a schema for the parent schema
+ // Create a schema for the outer schema
SchemaProto schema =
SchemaBuilder()
.AddType(
@@ -1397,7 +1545,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
.SetDataTypeDocument(
"B", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_OPTIONAL)))
- // Create the child schema
+ // Create the inner schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("B")
@@ -1419,7 +1567,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
SchemaProto schema =
SchemaBuilder()
- // Create a schema for the parent schema
+ // Create a schema for the outer schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("A")
@@ -1429,7 +1577,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
.SetDataTypeDocument(
"B", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_OPTIONAL)))
- // Create the child schema
+ // Create the inner schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("B")
@@ -1439,7 +1587,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
.SetDataTypeDocument(
"C", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED)))
- // Create the child schema
+ // Create the inner schema
.AddType(
SchemaTypeConfigBuilder()
.SetType("C")
diff --git a/icing/schema/section-manager-builder_test.cc b/icing/schema/section-manager-builder_test.cc
new file mode 100644
index 0000000..ef4b077
--- /dev/null
+++ b/icing/schema/section-manager-builder_test.cc
@@ -0,0 +1,329 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+
+class SectionManagerBuilderTest : public ::testing::Test {
+ protected:
+ void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+};
+
+TEST_F(SectionManagerBuilderTest, Build) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeTwo", 1));
+
+ PropertyConfigProto prop_foo =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ PropertyConfigProto prop_bar =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ PropertyConfigProto prop_baz =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_foo, /*property_path=*/"foo"));
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, prop_bar, /*property_path=*/"bar"));
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/1, prop_baz, /*property_path=*/"baz"));
+
+ std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+ // Check "typeOne"
+ EXPECT_THAT(
+ section_manager->GetMetadataList("typeOne"),
+ IsOkAndHolds(Pointee(ElementsAre(
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"foo", prop_foo),
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"bar", prop_bar)))));
+ // Check "typeTwo"
+ EXPECT_THAT(section_manager->GetMetadataList("typeTwo"),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+ /*expected_id=*/0,
+ /*expected_property_path=*/"baz", prop_baz)))));
+}
+
+TEST_F(SectionManagerBuilderTest, TooManyPropertiesShouldFail) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("type", 0));
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ // Add kTotalNumSections indexable properties
+ for (int i = 0; i < kTotalNumSections; i++) {
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"property" + std::to_string(i)));
+ }
+
+ // Add another indexable property. This should fail.
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+ EXPECT_THAT(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config,
+ /*property_path=*/"propertyExceed"),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+ HasSubstr("Too many properties")));
+}
+
+TEST_F(SectionManagerBuilderTest, InvalidSchemaTypeIdShouldFail) {
+ // Create a schema type mapper with invalid schema type id.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("type", 0));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/-1, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerBuilderTest,
+ SchemaTypeIdInconsistentWithSchemaTypeMapperSizeShouldFail) {
+ // Create a schema type mapper with schema type id = 2, but size of mapper is
+ // 2.
+ // Since SectionManagerBuilder expects 2 schema type ids = [0, 1], building
+ // with schema type id = 2 should fail even though id = 2 is in schema type
+ // mapper.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeOne", 0));
+ ICING_ASSERT_OK(schema_type_mapper->Put("typeTwo", 2));
+
+ PropertyConfigProto property_config =
+ PropertyConfigBuilder()
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ EXPECT_THAT(
+ builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/2, property_config, /*property_path=*/"property"),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+class IndexableSectionManagerBuilderTest
+ : public SectionManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(IndexableSectionManagerBuilderTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+ EXPECT_THAT(section_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+ /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following types are considered indexable:
+// - String with valid TermMatchType and TokenizerType
+// - Int64 with valid NumericMatchType
+INSTANTIATE_TEST_SUITE_P(
+ IndexableSectionManagerBuilderTest, IndexableSectionManagerBuilderTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_VERBATIM)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_RFC822)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_VERBATIM)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+class NonIndexableSectionManagerBuilderTest
+ : public SectionManagerBuilderTest,
+ public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(NonIndexableSectionManagerBuilderTest, Build) {
+ static constexpr std::string_view kSchemaType = "type";
+ static constexpr std::string_view kPropertyPath = "foo.bar";
+ const PropertyConfigProto& property_config = GetParam();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_ + "/schema_type_mapper",
+ /*maximum_size_bytes=*/3 * 128 * 1024));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+ SectionManager::Builder builder(*schema_type_mapper);
+ ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+ /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+ std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+ EXPECT_THAT(section_manager->GetMetadataList(std::string(kSchemaType)),
+ IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+// The following types are considered non-indexable:
+// - String with TERM_MATCH_UNKNOWN, TOKENIZER_NONE
+// - Int64 with NUMERIC_MATCH_UNKNOWN
+// - Double
+// - Boolean
+// - Bytes
+// - Document
+INSTANTIATE_TEST_SUITE_P(
+ NonIndexableSectionManagerBuilderTest,
+ NonIndexableSectionManagerBuilderTest,
+ testing::Values(PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BOOLEAN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build(),
+ PropertyConfigBuilder()
+ .SetName("property")
+ .SetDataTypeDocument("anotherSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build()));
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 2ca534e..c3cd1cd 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -30,12 +30,11 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
-#include "icing/schema/schema-util.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
@@ -43,19 +42,8 @@
namespace icing {
namespace lib {
-namespace {
-
-using TypeSectionMap =
- std::unordered_map<std::string, const std::vector<SectionMetadata>>;
-// Helper function to concatenate a path and a property name
-std::string ConcatenatePath(const std::string& path,
- const std::string& next_property_name) {
- if (path.empty()) {
- return next_property_name;
- }
- return absl_ports::StrCat(path, kPropertySeparator, next_property_name);
-}
+namespace {
// Helper function to append a new section metadata
libtextclassifier3::Status AppendNewSectionMetadata(
@@ -83,265 +71,77 @@ libtextclassifier3::Status AppendNewSectionMetadata(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status AssignSections(
- const SchemaTypeConfigProto& current_type_config,
- const std::string& current_section_path,
- const SchemaUtil::TypeConfigMap& type_config_map,
- std::vector<SectionMetadata>* metadata_list) {
- // Sorts properties by name's alphabetical order so that order doesn't affect
- // section assigning.
- auto sorted_properties = current_type_config.properties();
- std::sort(sorted_properties.pointer_begin(), sorted_properties.pointer_end(),
- [](const PropertyConfigProto* p1, const PropertyConfigProto* p2) {
- return p1->property_name() < p2->property_name();
- });
- for (const auto& property_config : sorted_properties) {
- // Creates section metadata according to data type
- switch (property_config.data_type()) {
- case PropertyConfigProto::DataType::DOCUMENT: {
- auto nested_type_config_iter =
- type_config_map.find(property_config.schema_type());
- if (nested_type_config_iter == type_config_map.end()) {
- // This should never happen because our schema should already be
- // validated by this point.
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Type config not found: ", property_config.schema_type()));
- }
-
- if (property_config.document_indexing_config()
- .index_nested_properties()) {
- // Assign any indexed sections recursively
- const SchemaTypeConfigProto& nested_type_config =
- nested_type_config_iter->second;
- ICING_RETURN_IF_ERROR(
- AssignSections(nested_type_config,
- ConcatenatePath(current_section_path,
- property_config.property_name()),
- type_config_map, metadata_list));
- }
- break;
- }
- case PropertyConfigProto::DataType::STRING: {
- if (property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
- ICING_RETURN_IF_ERROR(AppendNewSectionMetadata(
- metadata_list,
- ConcatenatePath(current_section_path,
- property_config.property_name()),
- PropertyConfigProto::DataType::STRING,
- property_config.string_indexing_config().tokenizer_type(),
- property_config.string_indexing_config().term_match_type(),
- IntegerIndexingConfig::NumericMatchType::UNKNOWN));
- }
- break;
- }
- case PropertyConfigProto::DataType::INT64: {
- if (property_config.integer_indexing_config().numeric_match_type() !=
- IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
- ICING_RETURN_IF_ERROR(AppendNewSectionMetadata(
- metadata_list,
- ConcatenatePath(current_section_path,
- property_config.property_name()),
- PropertyConfigProto::DataType::INT64,
- StringIndexingConfig::TokenizerType::NONE, TermMatchType::UNKNOWN,
- property_config.integer_indexing_config().numeric_match_type()));
- }
- break;
- }
- default: {
- // Skip other data types.
- break;
- }
- }
- }
- return libtextclassifier3::Status::OK;
-}
-
-// Builds a vector of vectors that holds SectionMetadatas for all the schema
-// types. The outer vector's index corresponds with a type's SchemaTypeId. The
-// inner vector's index corresponds to the section's SectionId.
-libtextclassifier3::StatusOr<std::vector<std::vector<SectionMetadata>>>
-BuildSectionMetadataCache(const SchemaUtil::TypeConfigMap& type_config_map,
- const KeyMapper<SchemaTypeId>& schema_type_mapper) {
- // Create our vector and reserve the number of schema types we have
- std::vector<std::vector<SectionMetadata>> section_metadata_cache(
- schema_type_mapper.num_keys());
-
- for (const auto& name_and_type : type_config_map) {
- // Assigns sections for each type config
- const std::string& type_config_name = name_and_type.first;
- const SchemaTypeConfigProto& type_config = name_and_type.second;
- std::vector<SectionMetadata> metadata_list;
- ICING_RETURN_IF_ERROR(AssignSections(type_config,
- /*current_section_path*/ "",
- type_config_map, &metadata_list));
-
- // Insert the section metadata list at the index of the type's SchemaTypeId
- ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
- schema_type_mapper.Get(type_config_name));
- section_metadata_cache[schema_type_id] = std::move(metadata_list);
- }
- return section_metadata_cache;
-}
-
-// Helper function to get content from a property according to the template type
-// T. We only care about STRING and INT64, which are the only 2 indexable data
-// types.
-template <typename T>
-libtextclassifier3::StatusOr<std::vector<T>> GetPropertyContent(
- const PropertyProto& property) {
- return absl_ports::UnimplementedError(
- "Unimplemented template type for GetPropertyContent");
-}
-
-template <>
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-GetPropertyContent<std::string_view>(const PropertyProto& property) {
- return std::vector<std::string_view>(property.string_values().begin(),
- property.string_values().end());
-}
-
-template <>
-libtextclassifier3::StatusOr<std::vector<int64_t>> GetPropertyContent<int64_t>(
- const PropertyProto& property) {
- return std::vector<int64_t>(property.int64_values().begin(),
- property.int64_values().end());
-}
-
template <typename T>
void AppendSection(
SectionMetadata section_metadata,
libtextclassifier3::StatusOr<std::vector<T>>&& section_content_or,
std::vector<Section<T>>& sections_out) {
- if (section_content_or.ok()) {
+ if (!section_content_or.ok()) {
+ return;
+ }
+
+ std::vector<T> section_content = std::move(section_content_or).ValueOrDie();
+ if (!section_content.empty()) {
// Adds to result vector if section is found in document
sections_out.emplace_back(std::move(section_metadata),
- std::move(section_content_or).ValueOrDie());
+ std::move(section_content));
}
}
} // namespace
-SectionManager::SectionManager(
- const KeyMapper<SchemaTypeId>* schema_type_mapper,
- std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
- : schema_type_mapper_(*schema_type_mapper),
- section_metadata_cache_(std::move(section_metadata_cache)) {}
-
-libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>>
-SectionManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
- const KeyMapper<SchemaTypeId>* schema_type_mapper) {
- ICING_RETURN_ERROR_IF_NULL(schema_type_mapper);
-
- ICING_ASSIGN_OR_RETURN(
- std::vector<std::vector<SectionMetadata>> section_metadata_cache,
- BuildSectionMetadataCache(type_config_map, *schema_type_mapper));
- return std::unique_ptr<SectionManager>(new SectionManager(
- schema_type_mapper, std::move(section_metadata_cache)));
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<std::vector<T>> SectionManager::GetSectionContent(
- const DocumentProto& document, std::string_view section_path) const {
- // Finds the first property name in section_path
- size_t separator_position = section_path.find(kPropertySeparator);
- std::string_view current_property_name =
- (separator_position == std::string::npos)
- ? section_path
- : section_path.substr(0, separator_position);
-
- // Tries to match the property name with the ones in document
- auto property_iterator =
- std::find_if(document.properties().begin(), document.properties().end(),
- [current_property_name](const PropertyProto& property) {
- return property.name() == current_property_name;
- });
-
- if (property_iterator == document.properties().end()) {
- // Property name not found, it could be one of the following 2 cases:
- // 1. The property is optional and it's not in the document
- // 2. The property name is invalid
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Section path '", section_path, "' not found in document."));
+libtextclassifier3::Status
+SectionManager::Builder::ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path) {
+ if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
}
- if (separator_position == std::string::npos) {
- // Current property name is the last one in section path
- ICING_ASSIGN_OR_RETURN(std::vector<T> content,
- GetPropertyContent<T>(*property_iterator));
- if (content.empty()) {
- // The content of property is explicitly set to empty, we'll treat it as
- // NOT_FOUND because the index doesn't care about empty contents.
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Section path '", section_path, "' content was empty"));
+ switch (property_config.data_type()) {
+ case PropertyConfigProto::DataType::STRING: {
+ if (property_config.string_indexing_config().term_match_type() !=
+ TermMatchType::UNKNOWN) {
+ ICING_RETURN_IF_ERROR(AppendNewSectionMetadata(
+ &section_metadata_cache_[schema_type_id], std::move(property_path),
+ PropertyConfigProto::DataType::STRING,
+ property_config.string_indexing_config().tokenizer_type(),
+ property_config.string_indexing_config().term_match_type(),
+ IntegerIndexingConfig::NumericMatchType::UNKNOWN));
+ }
+ break;
}
- return content;
- }
-
- // Gets section content recursively
- std::string_view sub_section_path =
- section_path.substr(separator_position + 1);
- std::vector<T> nested_document_content;
- for (const auto& nested_document : property_iterator->document_values()) {
- auto content_or = GetSectionContent<T>(nested_document, sub_section_path);
- if (content_or.ok()) {
- std::vector<T> content = std::move(content_or).ValueOrDie();
- std::move(content.begin(), content.end(),
- std::back_inserter(nested_document_content));
+ case PropertyConfigProto::DataType::INT64: {
+ if (property_config.integer_indexing_config().numeric_match_type() !=
+ IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
+ ICING_RETURN_IF_ERROR(AppendNewSectionMetadata(
+ &section_metadata_cache_[schema_type_id], std::move(property_path),
+ PropertyConfigProto::DataType::INT64,
+ StringIndexingConfig::TokenizerType::NONE, TermMatchType::UNKNOWN,
+ property_config.integer_indexing_config().numeric_match_type()));
+ }
+ break;
+ }
+ default: {
+ // Skip other data types.
+ break;
}
}
- if (nested_document_content.empty()) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Section path ", section_path,
- " not found in type config ", document.schema()));
- }
- return nested_document_content;
-}
-
-// Explicit template instantiation
-template libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SectionManager::GetSectionContent<std::string_view>(
- const DocumentProto& document, std::string_view section_path) const;
-template libtextclassifier3::StatusOr<std::vector<int64_t>>
-SectionManager::GetSectionContent<int64_t>(const DocumentProto& document,
- std::string_view section_path) const;
-
-template <typename T>
-libtextclassifier3::StatusOr<std::vector<T>> SectionManager::GetSectionContent(
- const DocumentProto& document, SectionId section_id) const {
- if (!IsSectionIdValid(section_id)) {
- return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
- "Section id %d is greater than the max value %d", section_id,
- kMaxSectionId));
- }
- ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
- GetMetadataList(document.schema()));
- if (section_id >= metadata_list->size()) {
- return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
- "Section with id %d doesn't exist in type config %s", section_id,
- document.schema().c_str()));
- }
- // The index of metadata list is the same as the section id, so we can use
- // section id as the index.
- return GetSectionContent<T>(document, metadata_list->at(section_id).path);
+ return libtextclassifier3::Status::OK;
}
-// Explicit template instantiation
-template libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SectionManager::GetSectionContent<std::string_view>(
- const DocumentProto& document, SectionId section_id) const;
-template libtextclassifier3::StatusOr<std::vector<int64_t>>
-SectionManager::GetSectionContent<int64_t>(const DocumentProto& document,
- SectionId section_id) const;
-
libtextclassifier3::StatusOr<const SectionMetadata*>
SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
+ if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ }
if (!IsSectionIdValid(section_id)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Section id %d is greater than the max value %d", section_id,
kMaxSectionId));
}
+
const std::vector<SectionMetadata>& section_metadatas =
section_metadata_cache_[schema_type_id];
if (section_id >= section_metadatas.size()) {
@@ -363,17 +163,18 @@ libtextclassifier3::StatusOr<SectionGroup> SectionManager::ExtractSections(
for (const SectionMetadata& section_metadata : *metadata_list) {
switch (section_metadata.data_type) {
case PropertyConfigProto::DataType::STRING: {
- AppendSection(section_metadata,
- GetSectionContent<std::string_view>(
- document, section_metadata.path),
- section_group.string_sections);
+ AppendSection(
+ section_metadata,
+ property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+ document, section_metadata.path),
+ section_group.string_sections);
break;
}
case PropertyConfigProto::DataType::INT64: {
- AppendSection(
- section_metadata,
- GetSectionContent<int64_t>(document, section_metadata.path),
- section_group.integer_sections);
+ AppendSection(section_metadata,
+ property_util::ExtractPropertyValuesFromDocument<int64_t>(
+ document, section_metadata.path),
+ section_group.integer_sections);
break;
}
default: {
diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 78a5acb..6241dc0 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h
@@ -22,7 +22,6 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/document.pb.h"
-#include "icing/schema/schema-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
@@ -30,62 +29,55 @@
namespace icing {
namespace lib {
-inline constexpr std::string_view kPropertySeparator = ".";
-inline constexpr std::string_view kLBracket = "[";
-inline constexpr std::string_view kRBracket = "]";
-
// This class provides section-related operations. It assigns sections according
// to type configs and extracts section / sections from documents.
+// The actual instance is created together with JoinablePropertyManager and both
+// of them are wrapped into SchemaTypeManager.
+//
+// Note: SectionManager assumes schema type ids are consecutive integers
+// starting from 0, so it maintains a vector with size
+// schema_type_mapper_->num_keys() that maps schema type id to a list (2nd level
+// vector) of SectionMetadatas. Therefore, all schema type ids stored in
+// schema_type_mapper_ must be in range [0, schema_type_mapper_->num_keys() - 1]
+// and unique.
class SectionManager {
public:
+ // Builder class to create a SectionManager which does not take ownership of
+ // any input components, and all pointers must refer to valid objects that
+ // outlive the created SectionManager instance.
+ class Builder {
+ public:
+ explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
+ : schema_type_mapper_(schema_type_mapper),
+ section_metadata_cache_(schema_type_mapper.num_keys()) {}
+
+ // Checks and appends a new SectionMetadata for the schema type id if the
+ // given property config is indexable.
+ //
+ // Returns:
+ // - OK on success
+ // - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
+ // schema_type_mapper_.num_keys() - 1])
+ // - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
+ // exceeds the threshold (kTotalNumSections)
+ libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
+ SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+ std::string&& property_path);
+
+ // Builds and returns a SectionManager instance.
+ std::unique_ptr<SectionManager> Build() && {
+ return std::unique_ptr<SectionManager>(new SectionManager(
+ schema_type_mapper_, std::move(section_metadata_cache_)));
+ }
+
+ private:
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own.
+ std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
+ };
+
SectionManager(const SectionManager&) = delete;
SectionManager& operator=(const SectionManager&) = delete;
- // Factory function to create a SectionManager which does not take ownership
- // of any input components, and all pointers must refer to valid objects that
- // outlive the created SectionManager instance.
- //
- // Returns:
- // A SectionManager on success
- // FAILED_PRECONDITION on any null pointer input
- // INVALID_ARGUMENT if infinite loop detected in the type configs
- // OUT_OF_RANGE if number of properties need indexing exceeds the max number
- // NOT_FOUND if any type config name not found in the map
- static libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>> Create(
- const SchemaUtil::TypeConfigMap& type_config_map,
- const KeyMapper<SchemaTypeId>* schema_type_mapper);
-
- // Finds contents of a section by section path (e.g. property1.property2)
- // according to the template type T.
- //
- // Types of supported T:
- // - std::string, std::string_view: return property.string_values()
- // - int64_t : return property.int64_values()
- //
- // Returns:
- // A vector of contents with the specified type on success
- // NOT_FOUND if:
- // 1. Property is optional and not found in the document
- // 2. section_path is invalid
- // 3. Content is empty (could be caused by incorrect type T)
- template <typename T>
- libtextclassifier3::StatusOr<std::vector<T>> GetSectionContent(
- const DocumentProto& document, std::string_view section_path) const;
-
- // Finds contents of a section by id according to the template type T.
- //
- // Types of supported T:
- // - std::string, std::string_view: return property.string_values()
- // - int64_t : return property.int64_values()
- //
- // Returns:
- // A vector of contents on success
- // INVALID_ARGUMENT if section id is invalid
- // NOT_FOUND if type config name of document not found
- template <typename T>
- libtextclassifier3::StatusOr<std::vector<T>> GetSectionContent(
- const DocumentProto& document, SectionId section_id) const;
-
// Returns the SectionMetadata associated with the SectionId that's in the
// SchemaTypeId.
//
@@ -103,24 +95,26 @@ class SectionManager {
//
// Returns:
// A SectionGroup instance on success
- // NOT_FOUND if type config name of document not found
+ // NOT_FOUND if the type config name of document is not present in
+ // schema_type_mapper_
libtextclassifier3::StatusOr<SectionGroup> ExtractSections(
const DocumentProto& document) const;
// Returns:
// - On success, the section metadatas for the specified type
- // - NOT_FOUND if the type config name is not present in the schema
+ // - NOT_FOUND if the type config name is not present in schema_type_mapper_
libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
GetMetadataList(const std::string& type_config_name) const;
private:
- // Use SectionManager::Create() to instantiate
explicit SectionManager(
- const KeyMapper<SchemaTypeId>* schema_type_mapper,
- std::vector<std::vector<SectionMetadata>>&& section_metadata_cache);
+ const KeyMapper<SchemaTypeId>& schema_type_mapper,
+ std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
+ : schema_type_mapper_(schema_type_mapper),
+ section_metadata_cache_(std::move(section_metadata_cache)) {}
// Maps schema types to a densely-assigned unique id.
- const KeyMapper<SchemaTypeId>& schema_type_mapper_;
+ const KeyMapper<SchemaTypeId>& schema_type_mapper_; // Does not own
// The index of section_metadata_cache_ corresponds to a schema type's
// SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 4e8fbbd..db2be6b 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -15,6 +15,9 @@
#include "icing/schema/section-manager.h"
#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -24,6 +27,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
+#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/key-mapper.h"
@@ -36,134 +40,158 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::Pointee;
using ::testing::SizeIs;
-// type and property names of EmailMessage
-constexpr char kTypeEmail[] = "EmailMessage";
-constexpr char kPropertySubject[] = "subject";
-constexpr char kPropertyText[] = "text";
-constexpr char kPropertyAttachment[] = "attachment";
-constexpr char kPropertyRecipients[] = "recipients";
-constexpr char kPropertyRecipientIds[] = "recipientIds";
-constexpr char kPropertyTimestamp[] = "timestamp";
-constexpr char kPropertyNonIndexableInteger[] = "non_indexable_integer";
+// type and property names of Email
+static constexpr std::string_view kTypeEmail = "Email";
+// indexable
+static constexpr std::string_view kPropertyRecipientIds = "recipientIds";
+static constexpr std::string_view kPropertyRecipients = "recipients";
+static constexpr std::string_view kPropertySubject = "subject";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+// non-indexable
+static constexpr std::string_view kPropertyAttachment = "attachment";
+static constexpr std::string_view kPropertyNonIndexableInteger =
+ "nonIndexableInteger";
+static constexpr std::string_view kPropertyText = "text";
+
// type and property names of Conversation
-constexpr char kTypeConversation[] = "Conversation";
-constexpr char kPropertyName[] = "name";
-constexpr char kPropertyEmails[] = "emails";
+static constexpr std::string_view kTypeConversation = "Conversation";
+// indexable
+static constexpr std::string_view kPropertyEmails = "emails";
+static constexpr std::string_view kPropertyName = "name";
constexpr int64_t kDefaultTimestamp = 1663274901;
+PropertyConfigProto CreateRecipientIdsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipientIds)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateRecipientsPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)
+ .Build();
+}
+
+PropertyConfigProto CreateSubjectPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateTimestampPropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyTimestamp)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .Build();
+}
+
+PropertyConfigProto CreateNamePropertyConfig() {
+ return PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(CreateSubjectPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyAttachment)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(CreateRecipientsPropertyConfig())
+ .AddProperty(CreateRecipientIdsPropertyConfig())
+ .AddProperty(CreateTimestampPropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNonIndexableInteger)
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+ return SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(CreateNamePropertyConfig())
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(kTypeEmail,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .Build();
+}
+
class SectionManagerTest : public ::testing::Test {
protected:
- SectionManagerTest() : test_dir_(GetTestTempDir() + "/icing") {
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/icing";
+
auto email_type = CreateEmailTypeConfig();
auto conversation_type = CreateConversationTypeConfig();
type_config_map_.emplace(email_type.schema_type(), email_type);
type_config_map_.emplace(conversation_type.schema_type(),
conversation_type);
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+ // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+ int key_mapper_size = 3 * 128 * 1024;
+ ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ filesystem_, test_dir_, key_mapper_size));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+ ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+
email_document_ =
DocumentBuilder()
.SetKey("icing", "email/1")
- .SetSchema(kTypeEmail)
- .AddStringProperty(kPropertySubject, "the subject")
- .AddStringProperty(kPropertyText, "the text")
- .AddBytesProperty(kPropertyAttachment, "attachment bytes")
- .AddStringProperty(kPropertyRecipients, "recipient1", "recipient2",
- "recipient3")
- .AddInt64Property(kPropertyRecipientIds, 1, 2, 3)
- .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
- .AddInt64Property(kPropertyNonIndexableInteger, 100)
+ .SetSchema(std::string(kTypeEmail))
+ .AddStringProperty(std::string(kPropertySubject), "the subject")
+ .AddStringProperty(std::string(kPropertyText), "the text")
+ .AddBytesProperty(std::string(kPropertyAttachment),
+ "attachment bytes")
+ .AddStringProperty(std::string(kPropertyRecipients), "recipient1",
+ "recipient2", "recipient3")
+ .AddInt64Property(std::string(kPropertyRecipientIds), 1, 2, 3)
+ .AddInt64Property(std::string(kPropertyTimestamp),
+ kDefaultTimestamp)
+ .AddInt64Property(std::string(kPropertyNonIndexableInteger), 100)
.Build();
conversation_document_ =
DocumentBuilder()
.SetKey("icing", "conversation/1")
- .SetSchema(kTypeConversation)
- .AddDocumentProperty(kPropertyEmails,
+ .SetSchema(std::string(kTypeConversation))
+ .AddDocumentProperty(std::string(kPropertyEmails),
DocumentProto(email_document_),
DocumentProto(email_document_))
.Build();
}
- void SetUp() override {
- // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
- // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
- int key_mapper_size = 3 * 128 * 1024;
- ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
- DynamicTrieKeyMapper<SchemaTypeId>::Create(
- filesystem_, test_dir_, key_mapper_size));
- ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
- ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
- }
-
- static SchemaTypeConfigProto CreateEmailTypeConfig() {
- SchemaTypeConfigProto type =
- SchemaTypeConfigBuilder()
- .SetType(kTypeEmail)
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(kPropertySubject)
- .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(kPropertyText)
- .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyAttachment)
- .SetDataType(TYPE_BYTES)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName(kPropertyRecipients)
- .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REPEATED))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyRecipientIds)
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_REPEATED))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyTimestamp)
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName(kPropertyNonIndexableInteger)
- .SetDataType(TYPE_INT64)
- .SetCardinality(CARDINALITY_REQUIRED))
- .Build();
- return type;
- }
-
- static SchemaTypeConfigProto CreateConversationTypeConfig() {
- SchemaTypeConfigProto type;
- type.set_schema_type(kTypeConversation);
-
- auto name = type.add_properties();
- name->set_property_name(kPropertyName);
- name->set_data_type(TYPE_STRING);
- name->set_cardinality(CARDINALITY_OPTIONAL);
- name->mutable_string_indexing_config()->set_term_match_type(
- TERM_MATCH_EXACT);
-
- auto emails = type.add_properties();
- emails->set_property_name(kPropertyEmails);
- emails->set_data_type(TYPE_DOCUMENT);
- emails->set_cardinality(CARDINALITY_REPEATED);
- emails->set_schema_type(kTypeEmail);
- emails->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- return type;
+ void TearDown() override {
+ schema_type_mapper_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
Filesystem filesystem_;
- const std::string test_dir_;
+ std::string test_dir_;
SchemaUtil::TypeConfigMap type_config_map_;
std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
@@ -171,244 +199,31 @@ class SectionManagerTest : public ::testing::Test {
DocumentProto conversation_document_;
};
-TEST_F(SectionManagerTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(
- SectionManager::Create(type_config_map_, /*schema_type_mapper=*/nullptr),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
-TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
- SchemaTypeConfigProto type_config;
- type_config.set_schema_type("type");
- // Adds more properties than allowed
- int max_num_sections_allowed = kMaxSectionId - kMinSectionId + 1;
- for (int i = 0; i < max_num_sections_allowed + 1; i++) {
- auto property = type_config.add_properties();
- property->set_property_name("property" + std::to_string(i));
- property->set_data_type(TYPE_STRING);
- property->set_cardinality(CARDINALITY_REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TERM_MATCH_EXACT);
- }
-
- SchemaUtil::TypeConfigMap type_config_map;
- type_config_map.emplace("type", type_config);
-
- EXPECT_THAT(
- SectionManager::Create(type_config_map, schema_type_mapper_.get()),
- StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
- HasSubstr("Too many properties")));
-}
-
-TEST_F(SectionManagerTest, GetSectionContent) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test simple section paths
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"subject"),
- IsOkAndHolds(ElementsAre("the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"text"),
- IsOkAndHolds(ElementsAre("the text")));
- EXPECT_THAT(
- section_manager->GetSectionContent<int64_t>(email_document_,
- /*section_path=*/"timestamp"),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp)));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentRepeatedValues) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test repeated values
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"recipients"),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- email_document_,
- /*section_path=*/"recipientIds"),
- IsOkAndHolds(ElementsAre(1, 2, 3)));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentConcatenatedSectionPaths) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test concatenated section paths: "property1.property2"
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.subject"),
- IsOkAndHolds(ElementsAre("the subject", "the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.text"),
- IsOkAndHolds(ElementsAre("the text", "the text")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_,
- /*section_path=*/"emails.timestamp"),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp, kDefaultTimestamp)));
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.recipients"),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3",
- "recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_,
- /*section_path=*/"emails.recipientIds"),
- IsOkAndHolds(ElementsAre(1, 2, 3, 1, 2, 3)));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentNonExistingPaths) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test non-existing paths
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"name"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"invalid"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_,
- /*section_path=*/"emails.invalid"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentNonIndexableTypes) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Test other data types
- // BYTES type can't be indexed, so content won't be returned
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"attachment"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(SectionManagerTest, GetSectionContentMismatchedType) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // Use the wrong template type to get the indexable content. GetSectionContent
- // should get empty content from the corresponding proto (repeated) field and
- // return NOT_FOUND error.
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_,
- /*section_path=*/"recipientIds"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- email_document_,
- /*section_path=*/"recipients"),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-// The following tests are similar to the ones above but use section ids
-// instead of section paths
-TEST_F(SectionManagerTest, GetSectionContentBySectionId) {
- ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
- // EmailMessage (section id -> section path):
- SectionId recipient_ids_section_id = 0;
- SectionId recipients_section_id = 1;
- SectionId subject_section_id = 2;
- SectionId timestamp_section_id = 3;
- SectionId invalid_email_section_id = 4;
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- email_document_, recipient_ids_section_id),
- IsOkAndHolds(ElementsAre(1, 2, 3)));
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- email_document_, recipients_section_id),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_, subject_section_id),
- IsOkAndHolds(ElementsAre("the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(email_document_,
- timestamp_section_id),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp)));
-
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- email_document_, invalid_email_section_id),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
- // Conversation (section id -> section path):
- // 0 -> emails.recipientIds
- // 1 -> emails.recipients
- // 2 -> emails.subject
- // 3 -> emails.timestamp
- // 4 -> name
- SectionId emails_recipient_ids_section_id = 0;
- SectionId emails_recipients_section_id = 1;
- SectionId emails_subject_section_id = 2;
- SectionId emails_timestamp_section_id = 3;
- SectionId name_section_id = 4;
- SectionId invalid_conversation_section_id = 5;
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_, emails_recipient_ids_section_id),
- IsOkAndHolds(ElementsAre(1, 2, 3, 1, 2, 3)));
- EXPECT_THAT(
- section_manager->GetSectionContent<std::string_view>(
- conversation_document_, emails_recipients_section_id),
- IsOkAndHolds(ElementsAre("recipient1", "recipient2", "recipient3",
- "recipient1", "recipient2", "recipient3")));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_, emails_subject_section_id),
- IsOkAndHolds(ElementsAre("the subject", "the subject")));
- EXPECT_THAT(section_manager->GetSectionContent<int64_t>(
- conversation_document_, emails_timestamp_section_id),
- IsOkAndHolds(ElementsAre(kDefaultTimestamp, kDefaultTimestamp)));
-
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_, name_section_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(section_manager->GetSectionContent<std::string_view>(
- conversation_document_, invalid_conversation_section_id),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
TEST_F(SectionManagerTest, ExtractSections) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
- // Extracts all sections from 'EmailMessage' document
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(email_document_));
+ // Extracts all sections from 'Email' document
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(email_document_));
// String sections
EXPECT_THAT(section_group.string_sections, SizeIs(2));
EXPECT_THAT(section_group.string_sections[0].metadata,
- Eq(SectionMetadata(
- /*id_in=*/1, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"recipients")));
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"recipients",
+ CreateRecipientsPropertyConfig()));
EXPECT_THAT(section_group.string_sections[0].content,
ElementsAre("recipient1", "recipient2", "recipient3"));
EXPECT_THAT(section_group.string_sections[1].metadata,
- Eq(SectionMetadata(
- /*id_in=*/2, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"subject")));
+ EqualsSectionMetadata(/*expected_id=*/2,
+ /*expected_property_path=*/"subject",
+ CreateSubjectPropertyConfig()));
EXPECT_THAT(section_group.string_sections[1].content,
ElementsAre("the subject"));
@@ -416,67 +231,176 @@ TEST_F(SectionManagerTest, ExtractSections) {
EXPECT_THAT(section_group.integer_sections, SizeIs(2));
EXPECT_THAT(section_group.integer_sections[0].metadata,
- Eq(SectionMetadata(/*id_in=*/0, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"recipientIds")));
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"recipientIds",
+ CreateRecipientIdsPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[0].content, ElementsAre(1, 2, 3));
EXPECT_THAT(section_group.integer_sections[1].metadata,
- Eq(SectionMetadata(/*id_in=*/3, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"timestamp")));
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"timestamp",
+ CreateTimestampPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[1].content,
ElementsAre(kDefaultTimestamp));
}
TEST_F(SectionManagerTest, ExtractSectionsNested) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
// Extracts all sections from 'Conversation' document
ICING_ASSERT_OK_AND_ASSIGN(
SectionGroup section_group,
- section_manager->ExtractSections(conversation_document_));
+ schema_type_manager->section_manager().ExtractSections(
+ conversation_document_));
// String sections
EXPECT_THAT(section_group.string_sections, SizeIs(2));
- EXPECT_THAT(section_group.string_sections[0].metadata,
- Eq(SectionMetadata(
- /*id_in=*/1, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"emails.recipients")));
+ EXPECT_THAT(
+ section_group.string_sections[0].metadata,
+ EqualsSectionMetadata(/*expected_id=*/1,
+ /*expected_property_path=*/"emails.recipients",
+ CreateRecipientsPropertyConfig()));
EXPECT_THAT(section_group.string_sections[0].content,
ElementsAre("recipient1", "recipient2", "recipient3",
"recipient1", "recipient2", "recipient3"));
EXPECT_THAT(section_group.string_sections[1].metadata,
- Eq(SectionMetadata(
- /*id_in=*/2, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
- NUMERIC_MATCH_UNKNOWN,
- /*path_in=*/"emails.subject")));
+ EqualsSectionMetadata(/*expected_id=*/2,
+ /*expected_property_path=*/"emails.subject",
+ CreateSubjectPropertyConfig()));
EXPECT_THAT(section_group.string_sections[1].content,
ElementsAre("the subject", "the subject"));
// Integer sections
EXPECT_THAT(section_group.integer_sections, SizeIs(2));
- EXPECT_THAT(section_group.integer_sections[0].metadata,
- Eq(SectionMetadata(/*id_in=*/0, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"emails.recipientIds")));
+ EXPECT_THAT(
+ section_group.integer_sections[0].metadata,
+ EqualsSectionMetadata(/*expected_id=*/0,
+ /*expected_property_path=*/"emails.recipientIds",
+ CreateRecipientIdsPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[0].content,
ElementsAre(1, 2, 3, 1, 2, 3));
- EXPECT_THAT(section_group.integer_sections[1].metadata,
- Eq(SectionMetadata(/*id_in=*/3, TYPE_INT64, TOKENIZER_NONE,
- TERM_MATCH_UNKNOWN, NUMERIC_MATCH_RANGE,
- /*path_in=*/"emails.timestamp")));
+ EXPECT_THAT(
+ section_group.integer_sections[1].metadata,
+ EqualsSectionMetadata(/*expected_id=*/3,
+ /*expected_property_path=*/"emails.timestamp",
+ CreateTimestampPropertyConfig()));
EXPECT_THAT(section_group.integer_sections[1].content,
ElementsAre(kDefaultTimestamp, kDefaultTimestamp));
}
+TEST_F(SectionManagerTest, GetSectionMetadata) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (section id -> section property path):
+ // 0 -> recipientIds
+ // 1 -> recipients
+ // 2 -> subject
+ // 3 -> timestamp
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/0),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"recipientIds",
+ CreateRecipientIdsPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/1),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"recipients",
+ CreateRecipientsPropertyConfig()))));
+
+ // Conversation (section id -> section property path):
+ // 0 -> emails.recipientIds
+ // 1 -> emails.recipients
+ // 2 -> emails.subject
+ // 3 -> emails.timestamp
+ // 4 -> name
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/0),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/0, /*expected_property_path=*/"emails.recipientIds",
+ CreateRecipientIdsPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/1),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/1, /*expected_property_path=*/"emails.recipients",
+ CreateRecipientsPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/2),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/2, /*expected_property_path=*/"emails.subject",
+ CreateSubjectPropertyConfig()))));
+ EXPECT_THAT(
+ schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/3),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/3, /*expected_property_path=*/"emails.timestamp",
+ CreateTimestampPropertyConfig()))));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/4),
+ IsOkAndHolds(Pointee(EqualsSectionMetadata(
+ /*expected_id=*/4, /*expected_property_path=*/"name",
+ CreateNamePropertyConfig()))));
+}
+
+TEST_F(SectionManagerTest, GetSectionMetadataInvalidSchemaTypeId) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+ ASSERT_THAT(type_config_map_, SizeIs(2));
+
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/-1, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/2, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerTest, GetSectionMetadataInvalidSectionId) {
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+ // Email (section id -> section property path):
+ // 0 -> recipientIds
+ // 1 -> recipients
+ // 2 -> subject
+ // 3 -> timestamp
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/0, /*section_id=*/4),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Conversation (section id -> section property path):
+ // 0 -> emails.recipientIds
+ // 1 -> emails.recipients
+ // 2 -> emails.subject
+ // 3 -> emails.timestamp
+ // 4 -> name
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+ /*schema_type_id=*/1, /*section_id=*/5),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST_F(SectionManagerTest,
NonStringFieldsWithStringIndexingConfigDontCreateSections) {
// Create a schema for an empty document.
@@ -557,9 +481,10 @@ TEST_F(SectionManagerTest,
ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
/*schema_type_id=*/1));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Create an empty document to be nested
DocumentProto empty_document = DocumentBuilder()
@@ -580,8 +505,9 @@ TEST_F(SectionManagerTest,
.Build();
// Extracts sections from 'Schema' document
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(document));
EXPECT_THAT(section_group.string_sections, IsEmpty());
EXPECT_THAT(section_group.integer_sections, IsEmpty());
}
@@ -656,9 +582,10 @@ TEST_F(SectionManagerTest,
ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
/*schema_type_id=*/1));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Create an empty document to be nested
DocumentProto empty_document = DocumentBuilder()
@@ -679,8 +606,9 @@ TEST_F(SectionManagerTest,
.Build();
// Extracts sections from 'Schema' document
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(document));
EXPECT_THAT(section_group.string_sections, IsEmpty());
EXPECT_THAT(section_group.integer_sections, IsEmpty());
}
@@ -756,14 +684,16 @@ TEST_F(SectionManagerTest, AssignSectionsRecursivelyForDocumentFields) {
ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
document_type_schema_type_id));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Extracts sections from 'Schema' document; there should be the 1 string
// property and 1 integer property inside the document.
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(outer_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(outer_document));
EXPECT_THAT(section_group.string_sections, SizeIs(1));
EXPECT_THAT(section_group.integer_sections, SizeIs(1));
}
@@ -839,14 +769,16 @@ TEST_F(SectionManagerTest, DontAssignSectionsRecursivelyForDocumentFields) {
ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
document_type_schema_type_id));
+ // Use SchemaTypeManager factory method to instantiate SectionManager.
ICING_ASSERT_OK_AND_ASSIGN(
- auto section_manager,
- SectionManager::Create(type_config_map, schema_type_mapper.get()));
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
// Extracts sections from 'Schema' document; there won't be any since we
// didn't recurse into the document to see the inner string property
- ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
- section_manager->ExtractSections(outer_document));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SectionGroup section_group,
+ schema_type_manager->section_manager().ExtractSections(outer_document));
EXPECT_THAT(section_group.string_sections, IsEmpty());
EXPECT_THAT(section_group.integer_sections, IsEmpty());
}
diff --git a/icing/schema/section.h b/icing/schema/section.h
index 241095b..65149b9 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -42,6 +42,10 @@ using SectionIdMask = int64_t;
inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
+static_assert(kSectionIdBits < 8 * sizeof(SectionId),
+ "Cannot exhaust all bits of SectionId since it is a signed "
+ "integer and the most significant bit should be preserved.");
+
static_assert(
kMaxSectionId < 8 * sizeof(SectionIdMask),
"SectionIdMask is not large enough to represent all section values!");
@@ -132,6 +136,9 @@ struct Section {
// Groups of different type sections. Callers can access sections with types
// they want and avoid going through non-desired ones.
+//
+// REQUIRES: lifecycle of the property must be longer than this object, since we
+// use std::string_view for extracting its string_values.
struct SectionGroup {
std::vector<Section<std::string_view>> string_sections;
std::vector<Section<int64_t>> integer_sections;
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
index b0b32e9..ebefc4e 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
@@ -24,6 +24,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
@@ -262,6 +263,27 @@ TEST_F(AdvancedScorerTest, BasicMathFunctionExpression) {
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("len(10, 11, 12, 13, 14)"),
+ /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("sum(10, 11, 12, 13, 14)"),
+ /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10 + 11 + 12 + 13 + 14));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("avg(10, 11, 12, 13, 14)"),
+ /*default_score=*/10, document_store_.get(), schema_store_.get()));
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq((10 + 11 + 12 + 13 + 14) / 5.));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(2)"),
/*default_score=*/10, document_store_.get(),
schema_store_.get()));
@@ -428,6 +450,119 @@ TEST_F(AdvancedScorerTest, RelevanceScoreFunctionScoreExpression) {
EXPECT_THAT(scorer->GetScore(docHitInfo, /*query_it=*/nullptr), Eq(10));
}
+TEST_F(AdvancedScorerTest, ChildrenScoresFunctionScoreExpression) {
+ const double default_score = 123;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id_1,
+ document_store_->Put(CreateDocument("namespace", "uri1")));
+ DocHitInfo docHitInfo1 = DocHitInfo(document_id_1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id_2,
+ document_store_->Put(CreateDocument("namespace", "uri2")));
+ DocHitInfo docHitInfo2 = DocHitInfo(document_id_2);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id_3,
+ document_store_->Put(CreateDocument("namespace", "uri3")));
+ DocHitInfo docHitInfo3 = DocHitInfo(document_id_3);
+
+ // Create a JoinChildrenFetcher that matches:
+ // document_id_1 to fake_child1 with score 1 and fake_child2 with score 2.
+ // document_id_2 to fake_child3 with score 4.
+ // document_id_3 has no child.
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression("this.qualifiedId()");
+ join_spec.set_child_property_expression("sender");
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ map_joinable_qualified_id;
+ ScoredDocumentHit fake_child1(/*document_id=*/10, kSectionIdMaskNone,
+ /*score=*/1.0);
+ ScoredDocumentHit fake_child2(/*document_id=*/11, kSectionIdMaskNone,
+ /*score=*/2.0);
+ ScoredDocumentHit fake_child3(/*document_id=*/12, kSectionIdMaskNone,
+ /*score=*/4.0);
+ map_joinable_qualified_id[document_id_1].push_back(fake_child1);
+ map_joinable_qualified_id[document_id_1].push_back(fake_child2);
+ map_joinable_qualified_id[document_id_2].push_back(fake_child3);
+ JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<AdvancedScorer> scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("len(this.childrenScores())"),
+ default_score, document_store_.get(), schema_store_.get(), &fetcher));
+ // document_id_1 has two children.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(2));
+ // document_id_2 has one child.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(1));
+ // document_id_3 has no child.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr), Eq(0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("sum(this.childrenScores())"),
+ default_score, document_store_.get(), schema_store_.get(), &fetcher));
+ // document_id_1 has two children with scores 1 and 2.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3));
+ // document_id_2 has one child with score 4.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4));
+ // document_id_3 has no child.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr), Eq(0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("avg(this.childrenScores())"),
+ default_score, document_store_.get(), schema_store_.get(), &fetcher));
+ // document_id_1 has two children with scores 1 and 2.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3 / 2.));
+ // document_id_2 has one child with score 4.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4 / 1.));
+ // document_id_3 has no child.
+ // This is an evaluation error, so default_score will be returned.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr),
+ Eq(default_score));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(
+ CreateAdvancedScoringSpec(
+ // Equivalent to "avg(this.childrenScores())"
+ "sum(this.childrenScores()) / len(this.childrenScores())"),
+ default_score, document_store_.get(), schema_store_.get(), &fetcher));
+ // document_id_1 has two children with scores 1 and 2.
+ EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3 / 2.));
+ // document_id_2 has one child with score 4.
+ EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4 / 1.));
+ // document_id_3 has no child.
+ // This is an evaluation error, so default_score will be returned.
+ EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr),
+ Eq(default_score));
+}
+
+TEST_F(AdvancedScorerTest, InvalidChildrenScoresFunctionScoreExpression) {
+ const double default_score = 123;
+
+ // Without join_children_fetcher provided, "len(this.childrenScores())" cannot
+ // be created.
+ EXPECT_THAT(AdvancedScorer::Create(
+ CreateAdvancedScoringSpec("len(this.childrenScores())"),
+ default_score, document_store_.get(), schema_store_.get(),
+ /*join_children_fetcher=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // The root expression can only be of double type, but here it is of list
+ // type.
+ JoinChildrenFetcher fake_fetcher(JoinSpecProto::default_instance(),
+ /*map_joinable_qualified_id=*/{});
+ EXPECT_THAT(
+ AdvancedScorer::Create(CreateAdvancedScoringSpec("this.childrenScores()"),
+ default_score, document_store_.get(),
+ schema_store_.get(), &fake_fetcher),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST_F(AdvancedScorerTest, ComplexExpression) {
const int64_t creation_timestamp_ms = 123;
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc
index 6393645..4df0710 100644
--- a/icing/scoring/advanced_scoring/score-expression.cc
+++ b/icing/scoring/advanced_scoring/score-expression.cc
@@ -14,6 +14,9 @@
#include "icing/scoring/advanced_scoring/score-expression.h"
+#include <numeric>
+#include <vector>
+
namespace icing {
namespace lib {
@@ -104,97 +107,104 @@ libtextclassifier3::StatusOr<double> OperatorScoreExpression::eval(
const std::unordered_map<std::string, MathFunctionScoreExpression::FunctionType>
MathFunctionScoreExpression::kFunctionNames = {
- {"log", FunctionType::kLog}, {"pow", FunctionType::kPow},
- {"max", FunctionType::kMax}, {"min", FunctionType::kMin},
- {"sqrt", FunctionType::kSqrt}, {"abs", FunctionType::kAbs},
- {"sin", FunctionType::kSin}, {"cos", FunctionType::kCos},
- {"tan", FunctionType::kTan}};
+ {"log", FunctionType::kLog}, {"pow", FunctionType::kPow},
+ {"max", FunctionType::kMax}, {"min", FunctionType::kMin},
+ {"len", FunctionType::kLen}, {"sum", FunctionType::kSum},
+ {"avg", FunctionType::kAvg}, {"sqrt", FunctionType::kSqrt},
+ {"abs", FunctionType::kAbs}, {"sin", FunctionType::kSin},
+ {"cos", FunctionType::kCos}, {"tan", FunctionType::kTan}};
const std::unordered_set<MathFunctionScoreExpression::FunctionType>
MathFunctionScoreExpression::kVariableArgumentsFunctions = {
- FunctionType::kMax, FunctionType::kMin};
+ FunctionType::kMax, FunctionType::kMin, FunctionType::kLen,
+ FunctionType::kSum, FunctionType::kAvg};
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
MathFunctionScoreExpression::Create(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children) {
- if (children.empty()) {
+ std::vector<std::unique_ptr<ScoreExpression>> args) {
+ if (args.empty()) {
return absl_ports::InvalidArgumentError(
"Math functions must have at least one argument.");
}
- ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
// Received a list type in the function argument.
- if (children.size() == 1 &&
- children[0]->type() == ScoreExpressionType::kDoubleList) {
+ if (args.size() == 1 && args[0]->type() == ScoreExpressionType::kDoubleList) {
// Only certain functions support list type.
if (kVariableArgumentsFunctions.count(function_type) > 0) {
return std::unique_ptr<MathFunctionScoreExpression>(
- new MathFunctionScoreExpression(function_type, std::move(children)));
+ new MathFunctionScoreExpression(function_type, std::move(args)));
}
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Received an unsupported list type argument in the math function."));
}
- bool children_all_constant_double = true;
- for (const auto& child : children) {
+ bool args_all_constant_double = true;
+ for (const auto& child : args) {
if (child->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
"Got an invalid type for the math function. Should expect a double "
"type argument.");
}
if (!child->is_constant_double()) {
- children_all_constant_double = false;
+ args_all_constant_double = false;
}
}
switch (function_type) {
case FunctionType::kLog:
- if (children.size() != 1 && children.size() != 2) {
+ if (args.size() != 1 && args.size() != 2) {
return absl_ports::InvalidArgumentError(
"log must have 1 or 2 arguments.");
}
break;
case FunctionType::kPow:
- if (children.size() != 2) {
+ if (args.size() != 2) {
return absl_ports::InvalidArgumentError("pow must have 2 arguments.");
}
break;
case FunctionType::kSqrt:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("sqrt must have 1 argument.");
}
break;
case FunctionType::kAbs:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("abs must have 1 argument.");
}
break;
case FunctionType::kSin:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("sin must have 1 argument.");
}
break;
case FunctionType::kCos:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("cos must have 1 argument.");
}
break;
case FunctionType::kTan:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError("tan must have 1 argument.");
}
break;
- // max and min support variable length arguments
+ // Functions that support variable length arguments
case FunctionType::kMax:
[[fallthrough]];
case FunctionType::kMin:
+ [[fallthrough]];
+ case FunctionType::kLen:
+ [[fallthrough]];
+ case FunctionType::kSum:
+ [[fallthrough]];
+ case FunctionType::kAvg:
break;
}
std::unique_ptr<ScoreExpression> expression =
std::unique_ptr<MathFunctionScoreExpression>(
- new MathFunctionScoreExpression(function_type, std::move(children)));
- if (children_all_constant_double) {
- // Because all of the children are constants, this expression does not
+ new MathFunctionScoreExpression(function_type, std::move(args)));
+ if (args_all_constant_double) {
+ // Because all of the arguments are constants, this expression does not
// depend on the DocHitInto or query_it that are passed into it.
return ConstantScoreExpression::Create(
expression->eval(DocHitInfo(), /*query_it=*/nullptr));
@@ -205,11 +215,10 @@ MathFunctionScoreExpression::Create(
libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
std::vector<double> values;
- if (children_.at(0)->type() == ScoreExpressionType::kDoubleList) {
- ICING_ASSIGN_OR_RETURN(values,
- children_.at(0)->eval_list(hit_info, query_it));
+ if (args_.at(0)->type() == ScoreExpressionType::kDoubleList) {
+ ICING_ASSIGN_OR_RETURN(values, args_.at(0)->eval_list(hit_info, query_it));
} else {
- for (const auto& child : children_) {
+ for (const auto& child : args_) {
ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it));
values.push_back(v);
}
@@ -243,6 +252,19 @@ libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
}
res = *std::min_element(values.begin(), values.end());
break;
+ case FunctionType::kLen:
+ res = values.size();
+ break;
+ case FunctionType::kSum:
+ res = std::reduce(values.begin(), values.end());
+ break;
+ case FunctionType::kAvg:
+ if (values.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Got an empty parameter set in avg function.");
+ }
+ res = std::reduce(values.begin(), values.end()) / values.size();
+ break;
case FunctionType::kSqrt:
res = sqrt(values[0]);
break;
@@ -278,15 +300,15 @@ const std::unordered_map<std::string,
libtextclassifier3::StatusOr<std::unique_ptr<DocumentFunctionScoreExpression>>
DocumentFunctionScoreExpression::Create(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children,
+ std::vector<std::unique_ptr<ScoreExpression>> args,
const DocumentStore* document_store, double default_score) {
- if (children.empty()) {
+ if (args.empty()) {
return absl_ports::InvalidArgumentError(
"Document-based functions must have at least one argument.");
}
- ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
- if (children[0]->type() != ScoreExpressionType::kDocument) {
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
return absl_ports::InvalidArgumentError(
"The first parameter of document-based functions must be \"this\".");
}
@@ -294,7 +316,7 @@ DocumentFunctionScoreExpression::Create(
case FunctionType::kDocumentScore:
[[fallthrough]];
case FunctionType::kCreationTimestamp:
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError(
"DocumentScore/CreationTimestamp must have 1 argument.");
}
@@ -302,8 +324,7 @@ DocumentFunctionScoreExpression::Create(
case FunctionType::kUsageCount:
[[fallthrough]];
case FunctionType::kUsageLastUsedTimestamp:
- if (children.size() != 2 ||
- children[1]->type() != ScoreExpressionType::kDouble) {
+ if (args.size() != 2 || args[1]->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
"UsageCount/UsageLastUsedTimestamp must have 2 arguments. The "
"first argument should be \"this\", and the second argument "
@@ -312,7 +333,7 @@ DocumentFunctionScoreExpression::Create(
break;
}
return std::unique_ptr<DocumentFunctionScoreExpression>(
- new DocumentFunctionScoreExpression(function_type, std::move(children),
+ new DocumentFunctionScoreExpression(function_type, std::move(args),
document_store, default_score));
}
@@ -335,7 +356,7 @@ libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
[[fallthrough]];
case FunctionType::kUsageLastUsedTimestamp: {
ICING_ASSIGN_OR_RETURN(double raw_usage_type,
- children_[1]->eval(hit_info, query_it));
+ args_[1]->eval(hit_info, query_it));
int usage_type = (int)raw_usage_type;
if (usage_type < 1 || usage_type > 3 || raw_usage_type != usage_type) {
return absl_ports::InvalidArgumentError(
@@ -368,21 +389,21 @@ libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
libtextclassifier3::StatusOr<
std::unique_ptr<RelevanceScoreFunctionScoreExpression>>
RelevanceScoreFunctionScoreExpression::Create(
- std::vector<std::unique_ptr<ScoreExpression>> children,
+ std::vector<std::unique_ptr<ScoreExpression>> args,
Bm25fCalculator* bm25f_calculator, double default_score) {
- if (children.size() != 1) {
+ if (args.size() != 1) {
return absl_ports::InvalidArgumentError(
"relevanceScore must have 1 argument.");
}
- ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
- if (children[0]->type() != ScoreExpressionType::kDocument) {
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
return absl_ports::InvalidArgumentError(
"relevanceScore must take \"this\" as its argument.");
}
return std::unique_ptr<RelevanceScoreFunctionScoreExpression>(
- new RelevanceScoreFunctionScoreExpression(
- std::move(children), bm25f_calculator, default_score));
+ new RelevanceScoreFunctionScoreExpression(bm25f_calculator,
+ default_score));
}
libtextclassifier3::StatusOr<double>
@@ -395,5 +416,43 @@ RelevanceScoreFunctionScoreExpression::eval(
bm25f_calculator_.ComputeScore(query_it, hit_info, default_score_));
}
+libtextclassifier3::StatusOr<
+ std::unique_ptr<ChildrenScoresFunctionScoreExpression>>
+ChildrenScoresFunctionScoreExpression::Create(
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const JoinChildrenFetcher* join_children_fetcher) {
+ if (args.size() != 1) {
+ return absl_ports::InvalidArgumentError(
+ "childrenScores must have 1 argument.");
+ }
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
+ return absl_ports::InvalidArgumentError(
+ "childrenScores must take \"this\" as its argument.");
+ }
+ if (join_children_fetcher == nullptr) {
+ return absl_ports::InvalidArgumentError(
+ "childrenScores must only be used with join, but JoinChildrenFetcher "
+ "is not provided.");
+ }
+ return std::unique_ptr<ChildrenScoresFunctionScoreExpression>(
+ new ChildrenScoresFunctionScoreExpression(*join_children_fetcher));
+}
+
+libtextclassifier3::StatusOr<std::vector<double>>
+ChildrenScoresFunctionScoreExpression::eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<ScoredDocumentHit> children_hits,
+ join_children_fetcher_.GetChildren(hit_info.document_id()));
+ std::vector<double> children_scores;
+ children_scores.reserve(children_hits.size());
+ for (const ScoredDocumentHit& child_hit : children_hits) {
+ children_scores.push_back(child_hit.score());
+ }
+ return std::move(children_scores);
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h
index 047a175..b70cd46 100644
--- a/icing/scoring/advanced_scoring/score-expression.h
+++ b/icing/scoring/advanced_scoring/score-expression.h
@@ -25,6 +25,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/scoring/bm25f-calculator.h"
#include "icing/store/document-store.h"
#include "icing/util/status-macros.h"
@@ -158,6 +159,9 @@ class MathFunctionScoreExpression : public ScoreExpression {
kPow,
kMax,
kMin,
+ kLen,
+ kSum,
+ kAvg,
kSqrt,
kAbs,
kSin,
@@ -172,11 +176,11 @@ class MathFunctionScoreExpression : public ScoreExpression {
// RETURNS:
// - A MathFunctionScoreExpression instance on success if not simplifiable.
// - A ConstantScoreExpression instance on success if simplifiable.
- // - FAILED_PRECONDITION on any null pointer in children.
+ // - FAILED_PRECONDITION on any null pointer in args.
// - INVALID_ARGUMENT on type errors.
static libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> Create(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children);
+ std::vector<std::unique_ptr<ScoreExpression>> args);
libtextclassifier3::StatusOr<double> eval(
const DocHitInfo& hit_info,
@@ -189,11 +193,11 @@ class MathFunctionScoreExpression : public ScoreExpression {
private:
explicit MathFunctionScoreExpression(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children)
- : function_type_(function_type), children_(std::move(children)) {}
+ std::vector<std::unique_ptr<ScoreExpression>> args)
+ : function_type_(function_type), args_(std::move(args)) {}
FunctionType function_type_;
- std::vector<std::unique_ptr<ScoreExpression>> children_;
+ std::vector<std::unique_ptr<ScoreExpression>> args_;
};
class DocumentFunctionScoreExpression : public ScoreExpression {
@@ -209,12 +213,12 @@ class DocumentFunctionScoreExpression : public ScoreExpression {
// RETURNS:
// - A DocumentFunctionScoreExpression instance on success.
- // - FAILED_PRECONDITION on any null pointer in children.
+ // - FAILED_PRECONDITION on any null pointer in args.
// - INVALID_ARGUMENT on type errors.
static libtextclassifier3::StatusOr<
std::unique_ptr<DocumentFunctionScoreExpression>>
Create(FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children,
+ std::vector<std::unique_ptr<ScoreExpression>> args,
const DocumentStore* document_store, double default_score);
libtextclassifier3::StatusOr<double> eval(
@@ -228,14 +232,14 @@ class DocumentFunctionScoreExpression : public ScoreExpression {
private:
explicit DocumentFunctionScoreExpression(
FunctionType function_type,
- std::vector<std::unique_ptr<ScoreExpression>> children,
+ std::vector<std::unique_ptr<ScoreExpression>> args,
const DocumentStore* document_store, double default_score)
- : children_(std::move(children)),
+ : args_(std::move(args)),
document_store_(*document_store),
default_score_(default_score),
function_type_(function_type) {}
- std::vector<std::unique_ptr<ScoreExpression>> children_;
+ std::vector<std::unique_ptr<ScoreExpression>> args_;
const DocumentStore& document_store_;
double default_score_;
FunctionType function_type_;
@@ -247,11 +251,11 @@ class RelevanceScoreFunctionScoreExpression : public ScoreExpression {
// RETURNS:
// - A RelevanceScoreFunctionScoreExpression instance on success.
- // - FAILED_PRECONDITION on any null pointer in children.
+ // - FAILED_PRECONDITION on any null pointer in args.
// - INVALID_ARGUMENT on type errors.
static libtextclassifier3::StatusOr<
std::unique_ptr<RelevanceScoreFunctionScoreExpression>>
- Create(std::vector<std::unique_ptr<ScoreExpression>> children,
+ Create(std::vector<std::unique_ptr<ScoreExpression>> args,
Bm25fCalculator* bm25f_calculator, double default_score);
libtextclassifier3::StatusOr<double> eval(
@@ -264,17 +268,41 @@ class RelevanceScoreFunctionScoreExpression : public ScoreExpression {
private:
explicit RelevanceScoreFunctionScoreExpression(
- std::vector<std::unique_ptr<ScoreExpression>> children,
Bm25fCalculator* bm25f_calculator, double default_score)
- : children_(std::move(children)),
- bm25f_calculator_(*bm25f_calculator),
- default_score_(default_score) {}
+ : bm25f_calculator_(*bm25f_calculator), default_score_(default_score) {}
- std::vector<std::unique_ptr<ScoreExpression>> children_;
Bm25fCalculator& bm25f_calculator_;
double default_score_;
};
+class ChildrenScoresFunctionScoreExpression : public ScoreExpression {
+ public:
+ static constexpr std::string_view kFunctionName = "childrenScores";
+
+ // RETURNS:
+ // - A ChildrenScoresFunctionScoreExpression instance on success.
+ // - FAILED_PRECONDITION on any null pointer in children.
+ // - INVALID_ARGUMENT on type errors.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<ChildrenScoresFunctionScoreExpression>>
+ Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+ const JoinChildrenFetcher* join_children_fetcher);
+
+ libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDoubleList;
+ }
+
+ private:
+ explicit ChildrenScoresFunctionScoreExpression(
+ const JoinChildrenFetcher& join_children_fetcher)
+ : join_children_fetcher_(join_children_fetcher) {}
+ const JoinChildrenFetcher& join_children_fetcher_;
+};
+
} // namespace lib
} // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression_test.cc b/icing/scoring/advanced_scoring/score-expression_test.cc
index 07c9d76..588090d 100644
--- a/icing/scoring/advanced_scoring/score-expression_test.cc
+++ b/icing/scoring/advanced_scoring/score-expression_test.cc
@@ -222,6 +222,27 @@ TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgument) {
MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+ // len({1, 2, 3}) = 3
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kLen,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3)));
+
+ // sum({1, 2, 3}) = 6
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSum,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(6)));
+
+ // avg({1, 2, 3}) = 2
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kAvg,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(2)));
+
// max({4}) = 4
ICING_ASSERT_OK_AND_ASSIGN(
expression, MathFunctionScoreExpression::Create(
@@ -235,6 +256,41 @@ TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgument) {
MathFunctionScoreExpression::FunctionType::kMin,
MakeChildren(ListScoreExpression::Create({5}))));
EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(5)));
+
+ // len({6}) = 1
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kLen,
+ MakeChildren(ListScoreExpression::Create({6}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+
+ // sum({7}) = 7
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSum,
+ MakeChildren(ListScoreExpression::Create({7}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(7)));
+
+ // avg({7}) = 7
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kAvg,
+ MakeChildren(ListScoreExpression::Create({7}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(7)));
+
+ // len({}) = 0
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kLen,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0)));
+
+ // sum({}) = 0
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSum,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0)));
}
TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgumentError) {
@@ -248,6 +304,15 @@ TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgumentError) {
EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // avg({}) = evaluation error, since avg on empty list does not produce a
+ // valid result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kAvg,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
// max(1, {2}) = type error, since max must take either n > 0 parameters of
// type double, or a single parameter of type list.
EXPECT_THAT(MathFunctionScoreExpression::Create(
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.cc b/icing/scoring/advanced_scoring/scoring-visitor.cc
index ea2e190..059e65b 100644
--- a/icing/scoring/advanced_scoring/scoring-visitor.cc
+++ b/icing/scoring/advanced_scoring/scoring-visitor.cc
@@ -77,16 +77,16 @@ void ScoringVisitor::VisitMember(const MemberNode* node) {
void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node,
bool is_member_function) {
- std::vector<std::unique_ptr<ScoreExpression>> children;
+ std::vector<std::unique_ptr<ScoreExpression>> args;
if (is_member_function) {
- children.push_back(ThisExpression::Create());
+ args.push_back(ThisExpression::Create());
}
for (const auto& arg : node->args()) {
arg->Accept(this);
if (has_pending_error()) {
return;
}
- children.push_back(pop_stack());
+ args.push_back(pop_stack());
}
const std::string& function_name = node->function_name()->value();
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> expression =
@@ -98,18 +98,23 @@ void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node,
// Document-based function
expression = DocumentFunctionScoreExpression::Create(
DocumentFunctionScoreExpression::kFunctionNames.at(function_name),
- std::move(children), &document_store_, default_score_);
+ std::move(args), &document_store_, default_score_);
} else if (function_name ==
RelevanceScoreFunctionScoreExpression::kFunctionName) {
// relevanceScore function
expression = RelevanceScoreFunctionScoreExpression::Create(
- std::move(children), &bm25f_calculator_, default_score_);
+ std::move(args), &bm25f_calculator_, default_score_);
+ } else if (function_name ==
+ ChildrenScoresFunctionScoreExpression::kFunctionName) {
+ // childrenScores function
+ expression = ChildrenScoresFunctionScoreExpression::Create(
+ std::move(args), join_children_fetcher_);
} else if (MathFunctionScoreExpression::kFunctionNames.find(function_name) !=
MathFunctionScoreExpression::kFunctionNames.end()) {
// Math functions
expression = MathFunctionScoreExpression::Create(
MathFunctionScoreExpression::kFunctionNames.at(function_name),
- std::move(children));
+ std::move(args));
}
if (!expression.ok()) {
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index bf12f96..9a126dc 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -97,13 +97,13 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -120,7 +120,6 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(scoring_spec, document_store.get(),
schema_store.get()));
-
int num_to_score = state.range(0);
int num_of_documents = state.range(1);
@@ -160,7 +159,6 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
PopTopResultsFromHeap(&scored_document_hits, /*num_results=*/20,
scored_document_hit_comparator);
}
-
// Clean up
document_store.reset();
schema_store.reset();
@@ -200,13 +198,13 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -304,13 +302,13 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -402,13 +400,13 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
- filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir, &clock));
+ SchemaStore::Create(&filesystem, schema_store_dir, &clock));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/store/suggestion-result-checker-impl.h b/icing/store/suggestion-result-checker-impl.h
index 89e7214..f6789e8 100644
--- a/icing/store/suggestion-result-checker-impl.h
+++ b/icing/store/suggestion-result-checker-impl.h
@@ -15,6 +15,7 @@
#ifndef ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
#define ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/store/namespace-id.h"
@@ -26,17 +27,76 @@ namespace lib {
class SuggestionResultCheckerImpl : public SuggestionResultChecker {
public:
explicit SuggestionResultCheckerImpl(
- const DocumentStore* document_store,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
std::unordered_set<NamespaceId> target_namespace_ids,
std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
document_id_filter_map,
std::unordered_set<SchemaTypeId> target_schema_type_ids,
- std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map)
+ std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map,
+ std::string target_section, std::unordered_set<DocumentId> search_base)
: document_store_(*document_store),
+ schema_store_(*schema_store),
target_namespace_ids_(std::move(target_namespace_ids)),
document_id_filter_map_(std::move(document_id_filter_map)),
target_schema_type_ids_(std::move(target_schema_type_ids)),
- property_filter_map_(std::move(property_filter_map)) {}
+ property_filter_map_(std::move(property_filter_map)),
+ target_section_(std::move(target_section)),
+ search_base_(std::move(search_base)) {}
+
+ bool MatchesTargetNamespace(NamespaceId namespace_id) const {
+ return target_namespace_ids_.empty() ||
+ target_namespace_ids_.find(namespace_id) !=
+ target_namespace_ids_.end();
+ }
+
+ bool MatchesTargetDocumentIds(NamespaceId namespace_id,
+ DocumentId document_id) const {
+ if (document_id_filter_map_.empty()) {
+ return true;
+ }
+ auto document_ids_itr = document_id_filter_map_.find(namespace_id);
+ // The client doesn't set desired document ids in this namespace, or the
+ // client doesn't want this document.
+ return document_ids_itr == document_id_filter_map_.end() ||
+ document_ids_itr->second.find(document_id) !=
+ document_ids_itr->second.end();
+ }
+
+ bool MatchesTargetSchemaType(SchemaTypeId schema_type_id) const {
+ return target_schema_type_ids_.empty() ||
+ target_schema_type_ids_.find(schema_type_id) !=
+ target_schema_type_ids_.end();
+ }
+
+ bool MatchesTargetSection(SchemaTypeId schema_type_id,
+ SectionId section_id) const {
+ if (target_section_.empty()) {
+ return true;
+ }
+ auto section_metadata_or =
+ schema_store_.GetSectionMetadata(schema_type_id, section_id);
+ if (!section_metadata_or.ok()) {
+ // cannot find the target section metadata.
+ return false;
+ }
+ const SectionMetadata* section_metadata = section_metadata_or.ValueOrDie();
+ return section_metadata->path == target_section_;
+ }
+
+ bool MatchesSearchBase(DocumentId document_id) const {
+ return search_base_.empty() ||
+ search_base_.find(document_id) != search_base_.end();
+ }
+
+ bool MatchesPropertyFilter(SchemaTypeId schema_type_id,
+ SectionId section_id) const {
+ if (property_filter_map_.empty()) {
+ return true;
+ }
+ auto section_mask_itr = property_filter_map_.find(schema_type_id);
+ return section_mask_itr == property_filter_map_.end() ||
+ (section_mask_itr->second & (UINT64_C(1) << section_id)) != 0;
+ }
bool BelongsToTargetResults(DocumentId document_id,
SectionId section_id) const override {
@@ -50,54 +110,38 @@ class SuggestionResultCheckerImpl : public SuggestionResultChecker {
DocumentFilterData document_filter_data =
document_filter_data_optional_.value();
- // 1: Check the namespace filter
- if (!target_namespace_ids_.empty() &&
- target_namespace_ids_.find(document_filter_data.namespace_id()) ==
- target_namespace_ids_.end()) {
- // User gives a namespace filter, and the current namespace isn't desired.
+ if (!MatchesTargetNamespace(document_filter_data.namespace_id())) {
return false;
}
-
- // 2: Check the document id filter
- if (!document_id_filter_map_.empty()) {
- auto document_ids_itr =
- document_id_filter_map_.find(document_filter_data.namespace_id());
- if (document_ids_itr != document_id_filter_map_.end() &&
- document_ids_itr->second.find(document_id) ==
- document_ids_itr->second.end()) {
- // The client doesn't set desired document ids in this namespace, or the
- // client doesn't want this document.
- return false;
- }
+ if (!MatchesTargetDocumentIds(document_filter_data.namespace_id(),
+ document_id)) {
+ return false;
}
-
- // 3: Check the schema type filter
- if (!target_schema_type_ids_.empty() &&
- target_schema_type_ids_.find(document_filter_data.schema_type_id()) ==
- target_schema_type_ids_.end()) {
- // User gives a schema type filter, and the current schema type isn't
- // desired.
+ if (!MatchesTargetSchemaType(document_filter_data.schema_type_id())) {
return false;
}
-
- if (!property_filter_map_.empty()) {
- auto section_mask_itr =
- property_filter_map_.find(document_filter_data.schema_type_id());
- if (section_mask_itr != property_filter_map_.end() &&
- (section_mask_itr->second & (UINT64_C(1) << section_id)) == 0) {
- // The client doesn't set desired properties in this schema, or the
- // client doesn't want this property.
- return false;
- }
+ if (!MatchesTargetSection(document_filter_data.schema_type_id(),
+ section_id)) {
+ return false;
+ }
+ if (!MatchesSearchBase(document_id)) {
+ return false;
+ }
+ if (!MatchesPropertyFilter(document_filter_data.schema_type_id(),
+ section_id)) {
+ return false;
}
return true;
}
const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
std::unordered_set<NamespaceId> target_namespace_ids_;
std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
document_id_filter_map_;
std::unordered_set<SchemaTypeId> target_schema_type_ids_;
std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map_;
+ std::string target_section_;
+ std::unordered_set<DocumentId> search_base_;
};
} // namespace lib
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index db7b7ef..bbc1a59 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -16,6 +16,7 @@
#define ICING_TESTING_COMMON_MATCHERS_H_
#include <algorithm>
+#include <cinttypes>
#include <cmath>
#include <string>
#include <vector>
@@ -32,6 +33,7 @@
#include "icing/portable/equals-proto.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/scoring/scored-document-hit.h"
@@ -378,6 +380,31 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
return false;
}
+MATCHER_P3(EqualsSectionMetadata, expected_id, expected_property_path,
+ expected_property_config_proto, "") {
+ const SectionMetadata& actual = arg;
+ return actual.id == expected_id && actual.path == expected_property_path &&
+ actual.data_type == expected_property_config_proto.data_type() &&
+ actual.tokenizer ==
+ expected_property_config_proto.string_indexing_config()
+ .tokenizer_type() &&
+ actual.term_match_type ==
+ expected_property_config_proto.string_indexing_config()
+ .term_match_type() &&
+ actual.numeric_match_type ==
+ expected_property_config_proto.integer_indexing_config()
+ .numeric_match_type();
+}
+
+MATCHER_P3(EqualsJoinablePropertyMetadata, expected_id, expected_property_path,
+ expected_property_config_proto, "") {
+ const JoinablePropertyMetadata& actual = arg;
+ return actual.id == expected_id && actual.path == expected_property_path &&
+ actual.data_type == expected_property_config_proto.data_type() &&
+ actual.value_type ==
+ expected_property_config_proto.joinable_config().value_type();
+}
+
std::string StatusCodeToString(libtextclassifier3::StatusCode code);
std::string ProtoStatusCodeToString(StatusProto::Code code);
diff --git a/icing/testing/numeric/number-generator.h b/icing/testing/numeric/number-generator.h
new file mode 100644
index 0000000..bb601b4
--- /dev/null
+++ b/icing/testing/numeric/number-generator.h
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
+
+#include <random>
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NumberGenerator {
+ public:
+ virtual ~NumberGenerator() = default;
+
+ virtual T Generate() = 0;
+
+ protected:
+ explicit NumberGenerator(int seed) : engine_(seed) {}
+
+ std::default_random_engine engine_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
diff --git a/icing/testing/numeric/uniform-distribution-integer-generator.h b/icing/testing/numeric/uniform-distribution-integer-generator.h
new file mode 100644
index 0000000..00d8459
--- /dev/null
+++ b/icing/testing/numeric/uniform-distribution-integer-generator.h
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+
+#include "icing/testing/numeric/number-generator.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class UniformDistributionIntegerGenerator : public NumberGenerator<T> {
+ public:
+ explicit UniformDistributionIntegerGenerator(int seed, T range_lower,
+ T range_upper)
+ : NumberGenerator<T>(seed), distribution_(range_lower, range_upper) {}
+
+ T Generate() override { return distribution_(this->engine_); }
+
+ private:
+ std::uniform_int_distribution<T> distribution_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index 8cd8d05..1dcbf9b 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -26,8 +26,8 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_join.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/token.h"
#include "icing/tokenization/tokenizer.h"
@@ -252,41 +252,45 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
// like "+", "&", "@", "#" in indexing and query tokenizers.
constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
/*State: Ready*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
- PROCESSING_OR, READY, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, READY, READY},
/*State: PROCESSING_ALPHANUMERIC_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
- PROCESSING_PROPERTY_RESTRICT, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
/*State: PROCESSING_EXCLUSION*/
{READY, PROCESSING_EXCLUSION_TERM, PROCESSING_EXCLUSION_TERM, INVALID,
CLOSING_PARENTHESES, PROCESSING_EXCLUSION, INVALID, INVALID, READY},
/*State: PROCESSING_EXCLUSION_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
/*State: PROCESSING_PROPERTY_RESTRICT*/
{READY, PROCESSING_PROPERTY_TERM, PROCESSING_PROPERTY_TERM, INVALID,
CLOSING_PARENTHESES, INVALID, INVALID, PROCESSING_PROPERTY_RESTRICT,
READY},
/*State: PROCESSING_PROPERTY_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
- PROCESSING_PROPERTY_TERM_APPENDING, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_TERM_APPENDING,
+ READY},
/*State: PROCESSING_OR*/
{READY, INVALID, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID,
INVALID, INVALID, READY},
/*State: OPENING_PARENTHESES*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
- OPENING_PARENTHESES, READY, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, PROCESSING_EXCLUSION, OPENING_PARENTHESES, READY,
+ READY},
/*State: CLOSING_PARENTHESES*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
- PROCESSING_OR, INVALID, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, INVALID, READY},
/*State: PROCESSING_NON_ASCII_ALPHANUMERIC_TERM*/
- {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
- OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+ {READY, PROCESSING_ALPHANUMERIC_TERM,
+ PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+ CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
/*State: PROCESSING_PROPERTY_TERM_APPENDING*/
{READY, PROCESSING_PROPERTY_TERM_APPENDING,
PROCESSING_PROPERTY_TERM_APPENDING, OPENING_PARENTHESES,
@@ -504,7 +508,7 @@ libtextclassifier3::Status OutputToken(State new_state,
// Asserts extra rule 1: each property name in the property path is a
// valid term.
for (std::string_view property :
- absl_ports::StrSplit(current_term, kPropertySeparator)) {
+ property_util::SplitPropertyPathExpr(current_term)) {
if (!SchemaUtil::ValidatePropertyName(property).ok()) {
return absl_ports::InvalidArgumentError(
GetErrorMessage(ERROR_NON_ASCII_AS_PROPERTY_NAME));
diff --git a/icing/util/encode-util.cc b/icing/util/encode-util.cc
new file mode 100644
index 0000000..2642da7
--- /dev/null
+++ b/icing/util/encode-util.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/encode-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace encode_util {
+
+std::string EncodeIntToCString(uint64_t value) {
+ std::string encoded_str;
+ // Encode it in base128 and add 1 to make sure that there is no 0-byte. This
+ // increases the size of the encoded_str from 8-bytes to 10-bytes at worst.
+ do {
+ encoded_str.push_back((value & 0x7F) + 1);
+ value >>= 7;
+ } while (value);
+ return encoded_str;
+}
+
+uint64_t DecodeIntFromCString(std::string_view encoded_str) {
+ uint64_t value = 0;
+ for (int i = encoded_str.length() - 1; i >= 0; --i) {
+ value <<= 7;
+ char c = encoded_str[i] - 1;
+ value |= (c & 0x7F);
+ }
+ return value;
+}
+
+} // namespace encode_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/encode-util.h b/icing/util/encode-util.h
new file mode 100644
index 0000000..5a31acb
--- /dev/null
+++ b/icing/util/encode-util.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_ENCODE_UTIL_H_
+#define ICING_UTIL_ENCODE_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace encode_util {
+
+// Converts an unsigned 64-bit integer to a C string that doesn't contain 0-byte
+// since C string uses 0-byte as terminator. This increases the size of the
+// encoded_str from 8-bytes to 10-bytes at worst.
+//
+// Note that it is compatible with unsigned 32-bit integers, i.e. casting an
+// uint32_t to uint64_t with the same value and encoding it by this method will
+// get the same string.
+std::string EncodeIntToCString(uint64_t value);
+
+// Converts a C string (encoded from EncodeIntToCString()) to an unsigned 64-bit
+// integer.
+uint64_t DecodeIntFromCString(std::string_view encoded_str);
+
+} // namespace encode_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_UTIL_ENCODE_UTIL_H_
diff --git a/icing/util/encode-util_test.cc b/icing/util/encode-util_test.cc
new file mode 100644
index 0000000..c6cb984
--- /dev/null
+++ b/icing/util/encode-util_test.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/encode-util.h"
+
+#include <cstdint>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace encode_util {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::SizeIs;
+
+TEST(EncodeUtilTest, IntCStringZeroConversion) {
+ uint64_t value = 0;
+ std::string encoded_str = EncodeIntToCString(value);
+
+ EXPECT_THAT(encoded_str, SizeIs(Gt(0)));
+ EXPECT_THAT(DecodeIntFromCString(encoded_str), Eq(value));
+}
+
+TEST(EncodeUtilTest, IntCStringConversionIsReversible) {
+ uint64_t value = 123456;
+ std::string encoded_str = EncodeIntToCString(value);
+ EXPECT_THAT(DecodeIntFromCString(encoded_str), Eq(value));
+}
+
+TEST(EncodeUtilTest, MultipleIntCStringConversionsAreReversible) {
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(25)), Eq(25));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(766)), Eq(766));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(2305)), Eq(2305));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(6922)), Eq(6922));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(62326)), Eq(62326));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(186985)), Eq(186985));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(560962)), Eq(560962));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1682893)), Eq(1682893));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(15146065)), Eq(15146065));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(136314613)),
+ Eq(136314613));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1226831545)),
+ Eq(1226831545));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(11041483933)),
+ Eq(11041483933));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(2683080596566)),
+ Eq(2683080596566));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(72443176107373)),
+ Eq(72443176107373));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1955965754899162)),
+ Eq(1955965754899162));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(52811075382277465)),
+ Eq(52811075382277465));
+ EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(4277697105964474945)),
+ Eq(4277697105964474945));
+}
+
+TEST(EncodeUtilTest, MultipleValidEncodedCStringIntConversionsAreReversible) {
+ // Only valid encoded C string (no zero bytes, length is between 1 and 10) are
+ // reversible.
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("foo")), Eq("foo"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("bar")), Eq("bar"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("baz")), Eq("baz"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Icing")), Eq("Icing"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Google")), Eq("Google"));
+ EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Youtube")),
+ Eq("Youtube"));
+}
+
+} // namespace
+
+} // namespace encode_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/snippet-helpers.cc b/icing/util/snippet-helpers.cc
index 6d6277f..ca6f423 100644
--- a/icing/util/snippet-helpers.cc
+++ b/icing/util/snippet-helpers.cc
@@ -17,47 +17,13 @@
#include <algorithm>
#include <string_view>
-#include "icing/absl_ports/str_join.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/search.pb.h"
-#include "icing/schema/section-manager.h"
+#include "icing/schema/property-util.h"
namespace icing {
namespace lib {
-namespace {
-
-// Returns the property index and the property name with the index removed.
-// Examples:
-// GetPropertyIndex("foo") will return ["foo", 0]
-// GetPropertyIndex("foo[5]") will return ["foo", 5]
-std::pair<std::string_view, int> GetPropertyIndex(std::string_view property) {
- size_t l_bracket = property.find(kLBracket);
- if (l_bracket == std::string_view::npos || l_bracket >= property.length()) {
- return {property, 0};
- }
- size_t r_bracket = property.find(kRBracket, l_bracket);
- if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
- return {property, 0};
- }
- std::string index_string =
- std::string(property.substr(l_bracket + 1, r_bracket - l_bracket - 1));
- return {property.substr(0, l_bracket), std::stoi(index_string)};
-}
-
-} // namespace
-
-const PropertyProto* GetProperty(const DocumentProto& document,
- std::string_view property_name) {
- const PropertyProto* property = nullptr;
- for (const PropertyProto& prop : document.properties()) {
- if (prop.name() == property_name) {
- property = &prop;
- }
- }
- return property;
-}
-
std::vector<std::string_view> GetWindows(
std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
std::vector<std::string_view> windows;
@@ -89,31 +55,36 @@ std::vector<std::string_view> GetSubMatches(
}
std::string_view GetString(const DocumentProto* document,
- std::string_view property_path) {
+ std::string_view property_path_expr) {
std::vector<std::string_view> properties =
- absl_ports::StrSplit(property_path, kPropertySeparator);
+ property_util::SplitPropertyPathExpr(property_path_expr);
for (int i = 0; i < properties.size(); ++i) {
- std::string_view property = properties.at(i);
- int property_index;
- std::tie(property, property_index) = GetPropertyIndex(property);
- const PropertyProto* prop = GetProperty(*document, property);
+ property_util::PropertyInfo property_info =
+ property_util::ParsePropertyNameExpr(properties.at(i));
+ if (property_info.index == property_util::kWildcardPropertyIndex) {
+ // Use index = 0 by default.
+ property_info.index = 0;
+ }
+
+ const PropertyProto* prop =
+ property_util::GetPropertyProto(*document, property_info.name);
if (prop == nullptr) {
// requested property doesn't exist in the document. Return empty string.
return "";
}
if (i == properties.size() - 1) {
// The last property. Get the string_value
- if (prop->string_values_size() - 1 < property_index) {
+ if (prop->string_values_size() - 1 < property_info.index) {
// The requested string doesn't exist. Return empty string.
return "";
}
- return prop->string_values(property_index);
- } else if (prop->document_values_size() - 1 < property_index) {
+ return prop->string_values(property_info.index);
+ } else if (prop->document_values_size() - 1 < property_info.index) {
// The requested subproperty doesn't exist. return an empty string.
return "";
} else {
// Go to the next subproperty.
- document = &prop->document_values(property_index);
+ document = &prop->document_values(property_info.index);
}
}
return "";
diff --git a/icing/util/snippet-helpers.h b/icing/util/snippet-helpers.h
index 73b2ce2..d7349ba 100644
--- a/icing/util/snippet-helpers.h
+++ b/icing/util/snippet-helpers.h
@@ -45,14 +45,14 @@ std::vector<std::string_view> GetSubMatches(
std::string_view content, const SnippetProto::EntryProto& snippet_proto);
// Retrieves the string value held in the document corresponding to the
-// property_path.
+// property_path_expr.
// Example:
// - GetString(doc, "foo") will retrieve the first string value in the
// property "foo" in document or an empty string if it doesn't exist.
// - GetString(doc, "foo[1].bar[2]") will retrieve the third string value in
// the subproperty "bar" of the second document value in the property "foo".
std::string_view GetString(const DocumentProto* document,
- std::string_view property_path);
+ std::string_view property_path_expr);
} // namespace lib
} // namespace icing
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 6f168bd..feb2643 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -23,7 +23,7 @@ option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Stats of the top-level function IcingSearchEngine::Initialize().
-// Next tag: 12
+// Next tag: 13
message InitializeStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -99,6 +99,12 @@ message InitializeStatsProto {
// Number of consecutive initialization failures that immediately preceded
// this initialization.
optional int32 num_previous_init_failures = 11;
+
+ // Possible recovery causes for integer index:
+ // - INCONSISTENT_WITH_GROUND_TRUTH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
+ // - IO_ERROR
+ optional RecoveryCause integer_index_restoration_cause = 12;
}
// Stats of the top-level function IcingSearchEngine::Put().
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index afbacd1..2dc1352 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=507959875)
+set(synced_AOSP_CL_number=513153289)