From cb6ac3ede1d2ad050895b588417ea353c75953fe Mon Sep 17 00:00:00 2001 From: Jiayu Hu Date: Thu, 30 Nov 2023 14:10:57 -0800 Subject: Update Icing from upstream. Descriptions: ======================================================================== [Icing][version 3] Bump kVersion to 3 ======================================================================== Make lite index magic dependent on `IcingSearchEngineOptions::build_property_existence_metadata_hits` ======================================================================== Add a flag in IcingSearchEngineOptions to control whether to build property existence metadata hits ======================================================================== Support `hasProperty(property_path)` in the advanced query language ======================================================================== Add PropertyExistenceIndexingHandler to index property existence metadata hit ======================================================================== [JoinIndex Improvement][11/x] Add IcingSearchEngine initialization unit test for switching join index ======================================================================== [JoinIndex Improvement][10/x] Change/Add IcingSearchEngine unit tests ======================================================================== [JoinIndex Improvement][9/x] Integrate QualifiedIdJoinIndexImplV2 with IcingSearchEngine ======================================================================== [JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with JoinProcessor ======================================================================== [JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with QualifiedIdJoinIndexingHandler ======================================================================== [JoinIndex Improvement][7/x] Create QualifiedIdJoinIndex interface ======================================================================== [JoinIndex Improvement][6.1/x] Unit test (Optimize) ======================================================================== [JoinIndex Improvement][6.0/x] Unit test (General, Put, GetIterator) ======================================================================== [JoinIndex Improvement][5.3/x] Implement Optimize ======================================================================== Remove accents from Greek letters in normalizer ======================================================================== Make arm emulator tests build-only. ======================================================================== [JoinIndex Improvement][5.2/x] Implement GetIterator ======================================================================== [JoinIndex Improvement][5.1/x] Implement Put ======================================================================== [JoinIndex Improvement][5.0/x] Branch QualifiedIdJoinIndex to QualifiedIdJoinIndexImplV2 ======================================================================== [JoinIndex Improvement][4/x] Implement PostingListJoinDataAccessor ======================================================================== [JoinIndex Improvement][3/x] Implement PostingListJoinDataSerializer and DocumentIdToJoinInfo data type ======================================================================== [JoinIndex Improvement][2/x] Create NamespaceFingerprintIdentifier ======================================================================== [JoinIndex Improvement][1/x] Implement namespace_id_old_to_new in Compaction ======================================================================== Update test to also handle ICU 74 segmentation rules. ======================================================================== [Icing][Expand QueryStats][3/x] Add new fields into QueryStats (1) ======================================================================== [Icing][Expand QueryStats][2/x] Refactor QueryStatsProto ======================================================================== [Icing][Expand QueryStats][1/x] Publish DocHitInfoIterator CallStats ======================================================================== Add additional property filter tests ======================================================================== Deprecate hit_intersect_section_ids_mask in DocHitInfoIterator ======================================================================== Change default requires_full_emulation to False for portable_cc_test (third_party/icing/testing) ======================================================================== Cleanup Set requires_full_emulation to True for selective tests ======================================================================== Fix monkey test failures ======================================================================== Complete monkey test logic to change schema during monkey test runtime ======================================================================== Refactor monkey test to prepare for schema update ======================================================================== Fix the schema bug found by monkey test with seed 2551429844 ======================================================================== Move set query stats to the very top of InternalSearch() ======================================================================== Apply section restriction only on leaf nodes ======================================================================== [6/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Advanced query parser) ======================================================================== [5/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PersistentHashMap) ======================================================================== [4/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListIntegerIndexSerializer) ======================================================================== [3/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListHitSerializer) ======================================================================== [2/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Posting list storage) ======================================================================== [1/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Non-functional changes) ======================================================================== Decouple section restriction data from iterators ======================================================================== Fix the crash when a schema type gets more indexable properties than allowed ======================================================================== Add a checker to verify the property data type matches the schema. ======================================================================== Change global std::string in i18n-utils to constexpr std::string_view. ======================================================================== Adjust LiteIndex sort at indexing check conditions. ======================================================================== Bug: 305098009 Bug: 307508735 Bug: 291130542 Bug: 275121148 Bug: 303239901 Bug: 301116242 Bug: 299321977 Bug: 300135897 Bug: 297549761 Bug: 309826655 Bug: 296349369 Bug: 302192690 Bug: 302609704 Bug: 301566713 NO_IFTTT="False Alarm: The path is only valid in G3. kVersion is changed to 3, and schema is compatible with version 1." Change-Id: I8c4c3cd9b93e5240bd774f0a3d6d812f7a9ec198 --- .../doc-hit-info-iterator-all-document-id.cc | 1 - .../doc-hit-info-iterator-all-document-id.h | 12 +- .../doc-hit-info-iterator-all-document-id_test.cc | 44 ++-- icing/index/iterator/doc-hit-info-iterator-and.cc | 34 +-- icing/index/iterator/doc-hit-info-iterator-and.h | 26 ++- .../iterator/doc-hit-info-iterator-and_test.cc | 154 ++++++-------- .../index/iterator/doc-hit-info-iterator-filter.cc | 11 - .../index/iterator/doc-hit-info-iterator-filter.h | 7 +- .../iterator/doc-hit-info-iterator-filter_test.cc | 24 +-- icing/index/iterator/doc-hit-info-iterator-none.h | 4 +- icing/index/iterator/doc-hit-info-iterator-not.cc | 14 +- icing/index/iterator/doc-hit-info-iterator-not.h | 13 +- .../iterator/doc-hit-info-iterator-not_test.cc | 45 ++-- icing/index/iterator/doc-hit-info-iterator-or.cc | 35 +--- icing/index/iterator/doc-hit-info-iterator-or.h | 23 ++- .../iterator/doc-hit-info-iterator-or_test.cc | 156 +++++++------- .../doc-hit-info-iterator-property-in-document.cc | 65 ++++++ .../doc-hit-info-iterator-property-in-document.h | 73 +++++++ .../doc-hit-info-iterator-property-in-schema.cc | 12 -- .../doc-hit-info-iterator-property-in-schema.h | 7 +- ...oc-hit-info-iterator-property-in-schema_test.cc | 5 +- .../doc-hit-info-iterator-section-restrict.cc | 230 ++++++++++----------- .../doc-hit-info-iterator-section-restrict.h | 100 ++++----- .../doc-hit-info-iterator-section-restrict_test.cc | 230 +++++++++++---------- .../iterator/doc-hit-info-iterator-test-util.h | 26 +-- icing/index/iterator/doc-hit-info-iterator.h | 148 +++++++++++-- icing/index/iterator/section-restrict-data.cc | 82 ++++++++ icing/index/iterator/section-restrict-data.h | 98 +++++++++ 28 files changed, 994 insertions(+), 685 deletions(-) create mode 100644 icing/index/iterator/doc-hit-info-iterator-property-in-document.cc create mode 100644 icing/index/iterator/doc-hit-info-iterator-property-in-document.h create mode 100644 icing/index/iterator/section-restrict-data.cc create mode 100644 icing/index/iterator/section-restrict-data.h (limited to 'icing/index/iterator') diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc index 67c7d25..1917fd0 100644 --- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc +++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc @@ -32,7 +32,6 @@ libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() { if (!IsDocumentIdValid(current_document_id_)) { // Reached the end, set these to invalid values and return doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h index bb16eaf..60c5e0c 100644 --- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h +++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h @@ -37,10 +37,16 @@ class DocHitInfoIteratorAllDocumentId : public DocHitInfoIterator { libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override { return 0; } + void MapChildren(const ChildrenMapper& mapper) override {} - int32_t GetNumLeafAdvanceCalls() const override { - return document_id_limit_ - current_document_id_; + CallStats GetCallStats() const override { + return CallStats( + /*num_leaf_advance_calls_lite_index_in=*/0, + /*num_leaf_advance_calls_main_index_in=*/0, + /*num_leaf_advance_calls_integer_index_in=*/0, + /*num_leaf_advance_calls_no_index_in=*/document_id_limit_ - + current_document_id_, + /*num_blocks_inspected_in=*/0); } std::string ToString() const override { diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc index ea2dda6..379cb4d 100644 --- a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc @@ -41,9 +41,8 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) { // We'll always start with an invalid document_id, need to Advance before we // get anything out of this. - EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(kInvalidDocumentId)); - EXPECT_THAT(all_it.hit_intersect_section_ids_mask(), - Eq(kSectionIdMaskNone)); + EXPECT_THAT(all_it.doc_hit_info(), + EqualsDocHitInfo(kInvalidDocumentId, std::vector{})); } { @@ -54,26 +53,25 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) { } } -TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumBlocksInspected) { +TEST(DocHitInfoIteratorAllDocumentIdTest, GetCallStats) { DocHitInfoIteratorAllDocumentId all_it(100); - EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0)); - - // Number of iterations is chosen arbitrarily. Just meant to demonstrate that - // no matter how many Advance calls are made, GetNumBlocksInspected should - // always return 0. - for (int i = 0; i < 5; ++i) { - EXPECT_THAT(all_it.Advance(), IsOk()); - EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0)); - } -} - -TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumLeafAdvanceCalls) { - DocHitInfoIteratorAllDocumentId all_it(100); - EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(0)); + EXPECT_THAT( + all_it.GetCallStats(), + EqualsDocHitInfoIteratorCallStats( + /*num_leaf_advance_calls_lite_index=*/0, + /*num_leaf_advance_calls_main_index=*/0, + /*num_leaf_advance_calls_integer_index=*/0, + /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0)); for (int i = 1; i <= 5; ++i) { EXPECT_THAT(all_it.Advance(), IsOk()); - EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(i)); + EXPECT_THAT( + all_it.GetCallStats(), + EqualsDocHitInfoIteratorCallStats( + /*num_leaf_advance_calls_lite_index=*/0, + /*num_leaf_advance_calls_main_index=*/0, + /*num_leaf_advance_calls_integer_index=*/0, + /*num_leaf_advance_calls_no_index=*/i, /*num_blocks_inspected=*/0)); } } @@ -87,12 +85,8 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) { // Test one advance DocHitInfoIteratorAllDocumentId all_it(5); EXPECT_THAT(all_it.Advance(), IsOk()); - EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(5)); - - // Advancing shouldn't affect the intersect section ids mask, since there's - // no intersecting going on - EXPECT_THAT(all_it.hit_intersect_section_ids_mask(), - Eq(kSectionIdMaskNone)); + EXPECT_THAT(all_it.doc_hit_info(), + EqualsDocHitInfo(5, std::vector{})); } { diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc index 185a35e..249bd0e 100644 --- a/icing/index/iterator/doc-hit-info-iterator-and.cc +++ b/icing/index/iterator/doc-hit-info-iterator-and.cc @@ -83,7 +83,6 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() { // Didn't find anything for the first iterator, reset to invalid values and // return. doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } @@ -106,8 +105,6 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() { // Guaranteed that short_doc_id and long_doc_id match now doc_hit_info_ = short_->doc_hit_info(); doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask()); - hit_intersect_section_ids_mask_ = short_->hit_intersect_section_ids_mask() & - long_->hit_intersect_section_ids_mask(); return libtextclassifier3::Status::OK; } @@ -124,14 +121,6 @@ DocHitInfoIteratorAnd::TrimRightMostNode() && { return trimmed_long; } -int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const { - return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorAnd::GetNumLeafAdvanceCalls() const { - return short_->GetNumLeafAdvanceCalls() + long_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorAnd::ToString() const { return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(), ")"); @@ -152,7 +141,6 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() { // Didn't find anything for the first iterator, reset to invalid values and // return doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } @@ -196,14 +184,10 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() { // Found a DocumentId which exists in all the iterators doc_hit_info_ = iterators_.at(0)->doc_hit_info(); - hit_intersect_section_ids_mask_ = - iterators_.at(0)->hit_intersect_section_ids_mask(); for (size_t i = 1; i < iterators_.size(); i++) { doc_hit_info_.MergeSectionsFrom( iterators_.at(i)->doc_hit_info().hit_section_ids_mask()); - hit_intersect_section_ids_mask_ &= - iterators_.at(i)->hit_intersect_section_ids_mask(); } return libtextclassifier3::Status::OK; } @@ -229,20 +213,12 @@ DocHitInfoIteratorAndNary::TrimRightMostNode() && { return trimmed_right; } -int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const { - int32_t blockCount = 0; - for (const std::unique_ptr& iter : iterators_) { - blockCount += iter->GetNumBlocksInspected(); - } - return blockCount; -} - -int32_t DocHitInfoIteratorAndNary::GetNumLeafAdvanceCalls() const { - int32_t leafCount = 0; - for (const std::unique_ptr& iter : iterators_) { - leafCount += iter->GetNumLeafAdvanceCalls(); +DocHitInfoIterator::CallStats DocHitInfoIteratorAndNary::GetCallStats() const { + CallStats call_stats; + for (const auto& iter : iterators_) { + call_stats += iter->GetCallStats(); } - return leafCount; + return call_stats; } std::string DocHitInfoIteratorAndNary::ToString() const { diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h index 0f40f94..8c52ac9 100644 --- a/icing/index/iterator/doc-hit-info-iterator-and.h +++ b/icing/index/iterator/doc-hit-info-iterator-and.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "icing/text_classifier/lib3/utils/base/status.h" @@ -42,14 +43,19 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator { libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override; - - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override { + return short_->GetCallStats() + long_->GetCallStats(); + } std::string ToString() const override; + void MapChildren(const ChildrenMapper& mapper) override { + short_ = mapper(std::move(short_)); + long_ = mapper(std::move(long_)); + } + void PopulateMatchedTermsStats( - std::vector *matched_terms_stats, + std::vector* matched_terms_stats, SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { if (doc_hit_info_.document_id() == kInvalidDocumentId) { // Current hit isn't valid, return. @@ -78,14 +84,18 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator { libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override; - - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override; std::string ToString() const override; + void MapChildren(const ChildrenMapper& mapper) override { + for (int i = 0; i < iterators_.size(); ++i) { + iterators_[i] = mapper(std::move(iterators_[i])); + } + } + void PopulateMatchedTermsStats( - std::vector *matched_terms_stats, + std::vector* matched_terms_stats, SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { if (doc_hit_info_.document_id() == kInvalidDocumentId) { // Current hit isn't valid, return. diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc index 51828cb..f204ada 100644 --- a/icing/index/iterator/doc-hit-info-iterator-and_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc @@ -74,39 +74,33 @@ TEST(DocHitInfoIteratorAndTest, Initialize) { std::make_unique()); // We start out with invalid values - EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId))); - EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), - Eq(kSectionIdMaskNone)); + EXPECT_THAT(and_iter.doc_hit_info(), + EqualsDocHitInfo(kInvalidDocumentId, std::vector{})); } -TEST(DocHitInfoIteratorAndTest, GetNumBlocksInspected) { - int first_iter_blocks = 4; // arbitrary value +TEST(DocHitInfoIteratorAndTest, GetCallStats) { + DocHitInfoIterator::CallStats first_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto first_iter = std::make_unique(); - first_iter->SetNumBlocksInspected(first_iter_blocks); - - int second_iter_blocks = 7; // arbitrary value + first_iter->SetCallStats(first_iter_call_stats); + + DocHitInfoIterator::CallStats second_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/6, + /*num_leaf_advance_calls_main_index_in=*/2, + /*num_leaf_advance_calls_integer_index_in=*/10, + /*num_leaf_advance_calls_no_index_in=*/3, + /*num_blocks_inspected_in=*/7); // arbitrary value auto second_iter = std::make_unique(); - second_iter->SetNumBlocksInspected(second_iter_blocks); + second_iter->SetCallStats(second_iter_call_stats); DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter)); - EXPECT_THAT(and_iter.GetNumBlocksInspected(), - Eq(first_iter_blocks + second_iter_blocks)); -} - -TEST(DocHitInfoIteratorAndTest, GetNumLeafAdvanceCalls) { - int first_iter_leaves = 4; // arbitrary value - auto first_iter = std::make_unique(); - first_iter->SetNumLeafAdvanceCalls(first_iter_leaves); - - int second_iter_leaves = 7; // arbitrary value - auto second_iter = std::make_unique(); - second_iter->SetNumLeafAdvanceCalls(second_iter_leaves); - - DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter)); - - EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(), - Eq(first_iter_leaves + second_iter_leaves)); + EXPECT_THAT(and_iter.GetCallStats(), + Eq(first_iter_call_stats + second_iter_call_stats)); } TEST(DocHitInfoIteratorAndTest, AdvanceNoOverlap) { @@ -293,24 +287,22 @@ TEST(DocHitInfoIteratorAndTest, SectionIdMask) { // Created to test correct section_id_mask behavior. SectionIdMask section_id_mask1 = 0b01010101; // hits in sections 0, 2, 4, 6 SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2 - SectionIdMask mask_anded_result = 0b00000100; SectionIdMask mask_ored_result = 0b01010111; std::vector first_vector = {DocHitInfo(4, section_id_mask1)}; std::vector second_vector = {DocHitInfo(4, section_id_mask2)}; auto first_iter = std::make_unique(first_vector); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter)); ICING_EXPECT_OK(and_iter.Advance()); EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(), Eq(mask_ored_result)); - EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result)); } TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) { @@ -340,11 +332,11 @@ TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) { auto first_iter = std::make_unique(first_vector, "hi"); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector, "hello"); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter)); @@ -380,11 +372,11 @@ TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) { auto first_iter = std::make_unique(first_vector, "hi"); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector, "hi"); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + second_iter->set_hit_section_ids_mask(section_id_mask1); DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter)); @@ -435,9 +427,8 @@ TEST(DocHitInfoIteratorAndNaryTest, Initialize) { DocHitInfoIteratorAndNary and_iter(std::move(iterators)); // We start out with invalid values - EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId))); - EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), - Eq(kSectionIdMaskNone)); + EXPECT_THAT(and_iter.doc_hit_info(), + EqualsDocHitInfo(kInvalidDocumentId, std::vector{})); } TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) { @@ -450,51 +441,42 @@ TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(DocHitInfoIteratorAndNaryTest, GetNumBlocksInspected) { - int first_iter_blocks = 4; // arbitrary value +TEST(DocHitInfoIteratorAndNaryTest, GetCallStats) { + DocHitInfoIterator::CallStats first_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto first_iter = std::make_unique(); - first_iter->SetNumBlocksInspected(first_iter_blocks); - - int second_iter_blocks = 7; // arbitrary value + first_iter->SetCallStats(first_iter_call_stats); + + DocHitInfoIterator::CallStats second_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/6, + /*num_leaf_advance_calls_main_index_in=*/2, + /*num_leaf_advance_calls_integer_index_in=*/10, + /*num_leaf_advance_calls_no_index_in=*/3, + /*num_blocks_inspected_in=*/7); // arbitrary value auto second_iter = std::make_unique(); - second_iter->SetNumBlocksInspected(second_iter_blocks); - - int third_iter_blocks = 13; // arbitrary value + second_iter->SetCallStats(second_iter_call_stats); + + DocHitInfoIterator::CallStats third_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/1000, + /*num_leaf_advance_calls_main_index_in=*/2000, + /*num_leaf_advance_calls_integer_index_in=*/3000, + /*num_leaf_advance_calls_no_index_in=*/0, + /*num_blocks_inspected_in=*/200); // arbitrary value auto third_iter = std::make_unique(); - third_iter->SetNumBlocksInspected(third_iter_blocks); - - int fourth_iter_blocks = 1; // arbitrary value - auto fourth_iter = std::make_unique(); - fourth_iter->SetNumBlocksInspected(fourth_iter_blocks); - - std::vector> iterators; - iterators.push_back(std::move(first_iter)); - iterators.push_back(std::move(second_iter)); - iterators.push_back(std::move(third_iter)); - iterators.push_back(std::move(fourth_iter)); - DocHitInfoIteratorAndNary and_iter(std::move(iterators)); - - EXPECT_THAT(and_iter.GetNumBlocksInspected(), - Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks + - fourth_iter_blocks)); -} - -TEST(DocHitInfoIteratorAndNaryTest, GetNumLeafAdvanceCalls) { - int first_iter_leaves = 4; // arbitrary value - auto first_iter = std::make_unique(); - first_iter->SetNumLeafAdvanceCalls(first_iter_leaves); - - int second_iter_leaves = 7; // arbitrary value - auto second_iter = std::make_unique(); - second_iter->SetNumLeafAdvanceCalls(second_iter_leaves); - - int third_iter_leaves = 13; // arbitrary value - auto third_iter = std::make_unique(); - third_iter->SetNumLeafAdvanceCalls(third_iter_leaves); - - int fourth_iter_leaves = 13; // arbitrary value + third_iter->SetCallStats(third_iter_call_stats); + + DocHitInfoIterator::CallStats fourth_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/200, + /*num_leaf_advance_calls_main_index_in=*/400, + /*num_leaf_advance_calls_integer_index_in=*/100, + /*num_leaf_advance_calls_no_index_in=*/20, + /*num_blocks_inspected_in=*/50); // arbitrary value auto fourth_iter = std::make_unique(); - fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves); + fourth_iter->SetCallStats(fourth_iter_call_stats); std::vector> iterators; iterators.push_back(std::move(first_iter)); @@ -503,9 +485,9 @@ TEST(DocHitInfoIteratorAndNaryTest, GetNumLeafAdvanceCalls) { iterators.push_back(std::move(fourth_iter)); DocHitInfoIteratorAndNary and_iter(std::move(iterators)); - EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(), - Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves + - fourth_iter_leaves)); + EXPECT_THAT(and_iter.GetCallStats(), + Eq(first_iter_call_stats + second_iter_call_stats + + third_iter_call_stats + fourth_iter_call_stats)); } TEST(DocHitInfoIteratorAndNaryTest, Advance) { @@ -541,7 +523,6 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) { SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2 SectionIdMask section_id_mask3 = 0b00001100; // hits in sections 2, 3 SectionIdMask section_id_mask4 = 0b00100100; // hits in sections 2, 5 - SectionIdMask mask_anded_result = 0b00000100; SectionIdMask mask_ored_result = 0b01101111; std::vector first_vector = {DocHitInfo(4, section_id_mask1)}; @@ -550,16 +531,16 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) { std::vector fourth_vector = {DocHitInfo(4, section_id_mask4)}; auto first_iter = std::make_unique(first_vector); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); auto third_iter = std::make_unique(third_vector); - third_iter->set_hit_intersect_section_ids_mask(section_id_mask3); + third_iter->set_hit_section_ids_mask(section_id_mask3); auto fourth_iter = std::make_unique(fourth_vector); - fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4); + fourth_iter->set_hit_section_ids_mask(section_id_mask4); std::vector> iterators; iterators.push_back(std::move(first_iter)); @@ -572,7 +553,6 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) { ICING_EXPECT_OK(and_iter.Advance()); EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(), Eq(mask_ored_result)); - EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result)); } TEST(DocHitInfoIteratorAndNaryTest, PopulateMatchedTermsStats) { diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc index a82e556..82d1ac7 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc @@ -100,14 +100,11 @@ libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() { // Satisfied all our specified filters doc_hit_info_ = delegate_->doc_hit_info(); - hit_intersect_section_ids_mask_ = - delegate_->hit_intersect_section_ids_mask(); return libtextclassifier3::Status::OK; } // Didn't find anything on the delegate iterator. doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator"); } @@ -123,14 +120,6 @@ DocHitInfoIteratorFilter::TrimRightMostNode() && { return trimmed_delegate; } -int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const { - return delegate_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorFilter::GetNumLeafAdvanceCalls() const { - return delegate_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorFilter::ToString() const { return delegate_->ToString(); } diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h index be5e1e8..608665e 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter.h +++ b/icing/index/iterator/doc-hit-info-iterator-filter.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "icing/text_classifier/lib3/utils/base/status.h" @@ -62,9 +63,11 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator { libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override; + void MapChildren(const ChildrenMapper& mapper) override { + delegate_ = mapper(std::move(delegate_)); + } - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override { return delegate_->GetCallStats(); } std::string ToString() const override; diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc index d93fd02..0ed4d02 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc @@ -1001,28 +1001,22 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) { EqualsDocHitInfo(document_id3, section_ids3))); } -TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) { +TEST_F(DocHitInfoIteratorFilterTest, GetCallStats) { + DocHitInfoIterator::CallStats original_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto original_iterator = std::make_unique(); - original_iterator->SetNumBlocksInspected(5); + original_iterator->SetCallStats(original_call_stats); DocHitInfoIteratorFilter::Options options; DocHitInfoIteratorFilter filtered_iterator( std::move(original_iterator), document_store_.get(), schema_store_.get(), options, fake_clock_.GetSystemTimeMilliseconds()); - EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5)); -} - -TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) { - auto original_iterator = std::make_unique(); - original_iterator->SetNumLeafAdvanceCalls(6); - - DocHitInfoIteratorFilter::Options options; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - options, fake_clock_.GetSystemTimeMilliseconds()); - - EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6)); + EXPECT_THAT(filtered_iterator.GetCallStats(), Eq(original_call_stats)); } TEST_F(DocHitInfoIteratorFilterTest, TrimFilterIterator) { diff --git a/icing/index/iterator/doc-hit-info-iterator-none.h b/icing/index/iterator/doc-hit-info-iterator-none.h index f938d32..c2853f1 100644 --- a/icing/index/iterator/doc-hit-info-iterator-none.h +++ b/icing/index/iterator/doc-hit-info-iterator-none.h @@ -39,9 +39,9 @@ class DocHitInfoIteratorNone : public DocHitInfoIterator { return node; } - int32_t GetNumBlocksInspected() const override { return 0; } + void MapChildren(const ChildrenMapper& mapper) override {} - int32_t GetNumLeafAdvanceCalls() const override { return 0; } + CallStats GetCallStats() const override { return CallStats(); } std::string ToString() const override { return "(NONE)"; } }; diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc index 38b1ded..10a8292 100644 --- a/icing/index/iterator/doc-hit-info-iterator-not.cc +++ b/icing/index/iterator/doc-hit-info-iterator-not.cc @@ -15,13 +15,15 @@ #include "icing/index/iterator/doc-hit-info-iterator-not.h" #include +#include +#include #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h" -#include "icing/schema/section.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/store/document-id.h" namespace icing { @@ -67,14 +69,8 @@ DocHitInfoIteratorNot::TrimRightMostNode() && { "Cannot generate suggestion if the last term is NOT operator."); } -int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const { - return to_be_excluded_->GetNumBlocksInspected() + - all_document_id_iterator_.GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorNot::GetNumLeafAdvanceCalls() const { - return to_be_excluded_->GetNumLeafAdvanceCalls() + - all_document_id_iterator_.GetNumLeafAdvanceCalls(); +void DocHitInfoIteratorNot::MapChildren(const ChildrenMapper& mapper) { + to_be_excluded_ = mapper(std::move(to_be_excluded_)); } std::string DocHitInfoIteratorNot::ToString() const { diff --git a/icing/index/iterator/doc-hit-info-iterator-not.h b/icing/index/iterator/doc-hit-info-iterator-not.h index 8cc3bf3..11575fb 100644 --- a/icing/index/iterator/doc-hit-info-iterator-not.h +++ b/icing/index/iterator/doc-hit-info-iterator-not.h @@ -30,14 +30,12 @@ namespace lib { // Iterator that will return all documents that are *not* specified by the // to_be_excluded_iterator. // -// NOTE: The hit_intersect_section_ids_mask is meaningless for this iterator. +// NOTE: doc_hit_info_.hit_section_ids_mask() is meaningless for this iterator. // When this iterator produces a result, it's because the Document was not // present in the to_be_excluded_iterator. There is no concept of the Document // having been chosen because it's term was in a specific section. Since we // don't know anything about the sections for the Document, the -// hit_intersect_section_ids_mask is always kSectionIdMaskNone. Correspondingly, -// this means that the doc_hit_info.hit_section_ids_mask will also always be -// kSectionIdMaskNone. +// doc_hit_info.hit_section_ids_mask() is always kSectionIdMaskNone. class DocHitInfoIteratorNot : public DocHitInfoIterator { public: // to_be_excluded_iterator: The results of this iterator will be excluded @@ -55,9 +53,12 @@ class DocHitInfoIteratorNot : public DocHitInfoIterator { // to NOT operator. libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override; + void MapChildren(const ChildrenMapper& mapper) override; - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override { + return to_be_excluded_->GetCallStats() + + all_document_id_iterator_.GetCallStats(); + } std::string ToString() const override; diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc index 5a8ce2c..a8c835f 100644 --- a/icing/index/iterator/doc-hit-info-iterator-not_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc @@ -102,40 +102,39 @@ TEST(DocHitInfoIteratorNotTest, AllDocumentIdOverlapOk) { EXPECT_THAT(GetDocumentIds(¬_iterator), IsEmpty()); } -TEST(DocHitInfoIteratorNotTest, GetNumBlocksInspected) { - int to_be_excluded_iterator_blocks = 4; // arbitrary value +TEST(DocHitInfoIteratorNotTest, GetCallStats) { + DocHitInfoIterator::CallStats to_be_excluded_iterator_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto to_be_excluded_iterator = std::make_unique(); - to_be_excluded_iterator->SetNumBlocksInspected( - to_be_excluded_iterator_blocks); - - DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator), - /*document_id_limit=*/5); - - // The AllDocumentId iterator doesn't count any blocks as being inspected - // since it's just decrementing 1 from the document_id_limit. - EXPECT_THAT(not_iterator.GetNumBlocksInspected(), - Eq(to_be_excluded_iterator_blocks)); -} - -TEST(DocHitInfoIteratorNotTest, GetNumLeafAdvanceCalls) { - int to_be_excluded_iterator_leaves = 4; // arbitrary value - auto to_be_excluded_iterator = std::make_unique(); - to_be_excluded_iterator->SetNumLeafAdvanceCalls( - to_be_excluded_iterator_leaves); + to_be_excluded_iterator->SetCallStats(to_be_excluded_iterator_call_stats); int all_document_id_limit = 5; // Since we iterate from [limit, 0] inclusive, add 1 for the 0th advance call int all_leaf_advance_calls = all_document_id_limit + 1; DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator), - all_document_id_limit); + /*document_id_limit=*/5); while (not_iterator.Advance().ok()) { // Advance through the whole not iterator } - // The AllDocumentId iterator counts each DocumentId as a leaf advance call - EXPECT_THAT(not_iterator.GetNumLeafAdvanceCalls(), - Eq(to_be_excluded_iterator_leaves + all_leaf_advance_calls)); + // The AllDocumentId iterator doesn't count lite/main/integer index or blocks + // as being inspected since it's just decrementing 1 from the + // document_id_limit. + EXPECT_THAT( + not_iterator.GetCallStats(), + EqualsDocHitInfoIteratorCallStats( + to_be_excluded_iterator_call_stats.num_leaf_advance_calls_lite_index, + to_be_excluded_iterator_call_stats.num_leaf_advance_calls_main_index, + to_be_excluded_iterator_call_stats + .num_leaf_advance_calls_integer_index, + to_be_excluded_iterator_call_stats.num_leaf_advance_calls_no_index + + all_leaf_advance_calls, + to_be_excluded_iterator_call_stats.num_blocks_inspected)); } TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) { diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc index 8f7b84f..6251365 100644 --- a/icing/index/iterator/doc-hit-info-iterator-or.cc +++ b/icing/index/iterator/doc-hit-info-iterator-or.cc @@ -20,6 +20,7 @@ #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/index/hit/doc-hit-info.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/store/document-id.h" #include "icing/util/status-macros.h" @@ -113,7 +114,6 @@ libtextclassifier3::Status DocHitInfoIteratorOr::Advance() { right_document_id_ == kInvalidDocumentId) { // Reached the end, set these to invalid values and return doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } @@ -132,26 +132,16 @@ libtextclassifier3::Status DocHitInfoIteratorOr::Advance() { current_ = chosen; doc_hit_info_ = chosen->doc_hit_info(); - hit_intersect_section_ids_mask_ = chosen->hit_intersect_section_ids_mask(); // If equal, combine. if (left_document_id_ == right_document_id_) { doc_hit_info_.MergeSectionsFrom( right_->doc_hit_info().hit_section_ids_mask()); - hit_intersect_section_ids_mask_ &= right_->hit_intersect_section_ids_mask(); } return libtextclassifier3::Status::OK; } -int32_t DocHitInfoIteratorOr::GetNumBlocksInspected() const { - return left_->GetNumBlocksInspected() + right_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorOr::GetNumLeafAdvanceCalls() const { - return left_->GetNumLeafAdvanceCalls() + right_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorOr::ToString() const { return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(), ")"); @@ -192,7 +182,6 @@ libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() { // 0 is the smallest (last) DocumentId, can't advance further. Reset to // invalid values and return directly doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } @@ -222,45 +211,31 @@ libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() { // None of the iterators had a next document_id, reset to invalid values and // return doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } // Found the next hit DocumentId, now calculate the section info. - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; for (const auto& iterator : iterators_) { if (iterator->doc_hit_info().document_id() == next_document_id) { current_iterators_.push_back(iterator.get()); if (doc_hit_info_.document_id() == kInvalidDocumentId) { doc_hit_info_ = iterator->doc_hit_info(); - hit_intersect_section_ids_mask_ = - iterator->hit_intersect_section_ids_mask(); } else { doc_hit_info_.MergeSectionsFrom( iterator->doc_hit_info().hit_section_ids_mask()); - hit_intersect_section_ids_mask_ &= - iterator->hit_intersect_section_ids_mask(); } } } return libtextclassifier3::Status::OK; } -int32_t DocHitInfoIteratorOrNary::GetNumBlocksInspected() const { - int32_t blockCount = 0; - for (const auto& iter : iterators_) { - blockCount += iter->GetNumBlocksInspected(); - } - return blockCount; -} - -int32_t DocHitInfoIteratorOrNary::GetNumLeafAdvanceCalls() const { - int32_t leafCount = 0; +DocHitInfoIterator::CallStats DocHitInfoIteratorOrNary::GetCallStats() const { + CallStats call_stats; for (const auto& iter : iterators_) { - leafCount += iter->GetNumLeafAdvanceCalls(); + call_stats += iter->GetCallStats(); } - return leafCount; + return call_stats; } std::string DocHitInfoIteratorOrNary::ToString() const { diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h index 1e9847d..8c0427b 100644 --- a/icing/index/iterator/doc-hit-info-iterator-or.h +++ b/icing/index/iterator/doc-hit-info-iterator-or.h @@ -16,7 +16,9 @@ #define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_OR_H_ #include +#include #include +#include #include "icing/index/iterator/doc-hit-info-iterator.h" @@ -38,12 +40,17 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator { libtextclassifier3::Status Advance() override; - int32_t GetNumBlocksInspected() const override; - - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override { + return left_->GetCallStats() + right_->GetCallStats(); + } std::string ToString() const override; + void MapChildren(const ChildrenMapper &mapper) override { + left_ = mapper(std::move(left_)); + right_ = mapper(std::move(right_)); + } + void PopulateMatchedTermsStats( std::vector *matched_terms_stats, SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { @@ -83,12 +90,16 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator { libtextclassifier3::Status Advance() override; - int32_t GetNumBlocksInspected() const override; - - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override; std::string ToString() const override; + void MapChildren(const ChildrenMapper &mapper) override { + for (int i = 0; i < iterators_.size(); ++i) { + iterators_[i] = mapper(std::move(iterators_[i])); + } + } + void PopulateMatchedTermsStats( std::vector *matched_terms_stats, SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc index 1950c01..d198b53 100644 --- a/icing/index/iterator/doc-hit-info-iterator-or_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc @@ -73,38 +73,33 @@ TEST(DocHitInfoIteratorOrTest, Initialize) { std::make_unique()); // We start out with invalid values - EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId))); - EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone)); + EXPECT_THAT(or_iter.doc_hit_info(), + EqualsDocHitInfo(kInvalidDocumentId, std::vector{})); } -TEST(DocHitInfoIteratorOrTest, GetNumBlocksInspected) { - int first_iter_blocks = 4; // arbitrary value +TEST(DocHitInfoIteratorOrTest, GetCallStats) { + DocHitInfoIterator::CallStats first_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto first_iter = std::make_unique(); - first_iter->SetNumBlocksInspected(first_iter_blocks); - - int second_iter_blocks = 7; // arbitrary value + first_iter->SetCallStats(first_iter_call_stats); + + DocHitInfoIterator::CallStats second_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/6, + /*num_leaf_advance_calls_main_index_in=*/2, + /*num_leaf_advance_calls_integer_index_in=*/10, + /*num_leaf_advance_calls_no_index_in=*/3, + /*num_blocks_inspected_in=*/7); // arbitrary value auto second_iter = std::make_unique(); - second_iter->SetNumBlocksInspected(second_iter_blocks); + second_iter->SetCallStats(second_iter_call_stats); DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter)); - EXPECT_THAT(or_iter.GetNumBlocksInspected(), - Eq(first_iter_blocks + second_iter_blocks)); -} - -TEST(DocHitInfoIteratorOrTest, GetNumLeafAdvanceCalls) { - int first_iter_leaves = 4; // arbitrary value - auto first_iter = std::make_unique(); - first_iter->SetNumLeafAdvanceCalls(first_iter_leaves); - - int second_iter_leaves = 7; // arbitrary value - auto second_iter = std::make_unique(); - second_iter->SetNumLeafAdvanceCalls(second_iter_leaves); - - DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter)); - - EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(), - Eq(first_iter_leaves + second_iter_leaves)); + EXPECT_THAT(or_iter.GetCallStats(), + Eq(first_iter_call_stats + second_iter_call_stats)); } TEST(DocHitInfoIteratorOrTest, Advance) { @@ -155,24 +150,22 @@ TEST(DocHitInfoIteratorOrTest, SectionIdMask) { // Created to test correct section_id_mask behavior. SectionIdMask section_id_mask1 = 0b01010101; // hits in sections 0, 2, 4, 6 SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2 - SectionIdMask mask_anded_result = 0b00000100; SectionIdMask mask_ored_result = 0b01010111; std::vector first_vector = {DocHitInfo(4, section_id_mask1)}; std::vector second_vector = {DocHitInfo(4, section_id_mask2)}; auto first_iter = std::make_unique(first_vector); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter)); ICING_EXPECT_OK(or_iter.Advance()); EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(), Eq(mask_ored_result)); - EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result)); } TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) { @@ -200,11 +193,11 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) { auto first_iter = std::make_unique(first_vector, "hi"); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector, "hello"); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter)); std::vector matched_terms_stats; @@ -238,11 +231,11 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) { auto first_iter = std::make_unique(first_vector, "hi"); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector, "hi"); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + second_iter->set_hit_section_ids_mask(section_id_mask1); DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter)); std::vector matched_terms_stats; @@ -281,11 +274,11 @@ TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) { auto first_iter = std::make_unique(first_vector, "hi"); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector, "hello"); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter)); std::vector matched_terms_stats; @@ -362,8 +355,8 @@ TEST(DocHitInfoIteratorOrNaryTest, Initialize) { DocHitInfoIteratorOrNary or_iter(std::move(iterators)); // We start out with invalid values - EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId))); - EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone)); + EXPECT_THAT(or_iter.doc_hit_info(), + EqualsDocHitInfo(kInvalidDocumentId, std::vector{})); } TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) { @@ -376,51 +369,42 @@ TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(DocHitInfoIteratorOrNaryTest, GetNumBlocksInspected) { - int first_iter_blocks = 4; // arbitrary value +TEST(DocHitInfoIteratorOrNaryTest, GetCallStats) { + DocHitInfoIterator::CallStats first_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto first_iter = std::make_unique(); - first_iter->SetNumBlocksInspected(first_iter_blocks); - - int second_iter_blocks = 7; // arbitrary value + first_iter->SetCallStats(first_iter_call_stats); + + DocHitInfoIterator::CallStats second_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/6, + /*num_leaf_advance_calls_main_index_in=*/2, + /*num_leaf_advance_calls_integer_index_in=*/10, + /*num_leaf_advance_calls_no_index_in=*/3, + /*num_blocks_inspected_in=*/7); // arbitrary value auto second_iter = std::make_unique(); - second_iter->SetNumBlocksInspected(second_iter_blocks); - - int third_iter_blocks = 13; // arbitrary value + second_iter->SetCallStats(second_iter_call_stats); + + DocHitInfoIterator::CallStats third_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/1000, + /*num_leaf_advance_calls_main_index_in=*/2000, + /*num_leaf_advance_calls_integer_index_in=*/3000, + /*num_leaf_advance_calls_no_index_in=*/0, + /*num_blocks_inspected_in=*/200); // arbitrary value auto third_iter = std::make_unique(); - third_iter->SetNumBlocksInspected(third_iter_blocks); - - int fourth_iter_blocks = 1; // arbitrary value - auto fourth_iter = std::make_unique(); - fourth_iter->SetNumBlocksInspected(fourth_iter_blocks); - - std::vector> iterators; - iterators.push_back(std::move(first_iter)); - iterators.push_back(std::move(second_iter)); - iterators.push_back(std::move(third_iter)); - iterators.push_back(std::move(fourth_iter)); - DocHitInfoIteratorOrNary or_iter(std::move(iterators)); - - EXPECT_THAT(or_iter.GetNumBlocksInspected(), - Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks + - fourth_iter_blocks)); -} - -TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) { - int first_iter_leaves = 4; // arbitrary value - auto first_iter = std::make_unique(); - first_iter->SetNumLeafAdvanceCalls(first_iter_leaves); - - int second_iter_leaves = 7; // arbitrary value - auto second_iter = std::make_unique(); - second_iter->SetNumLeafAdvanceCalls(second_iter_leaves); - - int third_iter_leaves = 13; // arbitrary value - auto third_iter = std::make_unique(); - third_iter->SetNumLeafAdvanceCalls(third_iter_leaves); - - int fourth_iter_leaves = 13; // arbitrary value + third_iter->SetCallStats(third_iter_call_stats); + + DocHitInfoIterator::CallStats fourth_iter_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/200, + /*num_leaf_advance_calls_main_index_in=*/400, + /*num_leaf_advance_calls_integer_index_in=*/100, + /*num_leaf_advance_calls_no_index_in=*/20, + /*num_blocks_inspected_in=*/50); // arbitrary value auto fourth_iter = std::make_unique(); - fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves); + fourth_iter->SetCallStats(fourth_iter_call_stats); std::vector> iterators; iterators.push_back(std::move(first_iter)); @@ -429,9 +413,9 @@ TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) { iterators.push_back(std::move(fourth_iter)); DocHitInfoIteratorOrNary or_iter(std::move(iterators)); - EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(), - Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves + - fourth_iter_leaves)); + EXPECT_THAT(or_iter.GetCallStats(), + Eq(first_iter_call_stats + second_iter_call_stats + + third_iter_call_stats + fourth_iter_call_stats)); } TEST(DocHitInfoIteratorOrNaryTest, Advance) { @@ -460,7 +444,6 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) { SectionIdMask section_id_mask2 = 0b00000110; // hits in sections 1, 2 SectionIdMask section_id_mask3 = 0b00001100; // hits in sections 2, 3 SectionIdMask section_id_mask4 = 0b00100100; // hits in sections 2, 5 - SectionIdMask mask_anded_result = 0b00000100; SectionIdMask mask_ored_result = 0b01101111; std::vector first_vector = {DocHitInfo(4, section_id_mask1)}; @@ -469,16 +452,16 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) { std::vector fourth_vector = {DocHitInfo(4, section_id_mask4)}; auto first_iter = std::make_unique(first_vector); - first_iter->set_hit_intersect_section_ids_mask(section_id_mask1); + first_iter->set_hit_section_ids_mask(section_id_mask1); auto second_iter = std::make_unique(second_vector); - second_iter->set_hit_intersect_section_ids_mask(section_id_mask2); + second_iter->set_hit_section_ids_mask(section_id_mask2); auto third_iter = std::make_unique(third_vector); - third_iter->set_hit_intersect_section_ids_mask(section_id_mask3); + third_iter->set_hit_section_ids_mask(section_id_mask3); auto fourth_iter = std::make_unique(fourth_vector); - fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4); + fourth_iter->set_hit_section_ids_mask(section_id_mask4); std::vector> iterators; iterators.push_back(std::move(first_iter)); @@ -491,7 +474,6 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) { ICING_EXPECT_OK(or_iter.Advance()); EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(), Eq(mask_ored_result)); - EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result)); } TEST(DocHitInfoIteratorOrNaryTest, PopulateMatchedTermsStats) { diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc new file mode 100644 index 0000000..e6a1c67 --- /dev/null +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc @@ -0,0 +1,65 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/iterator/doc-hit-info-iterator-property-in-document.h" + +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/store/document-id.h" + +namespace icing { +namespace lib { + +DocHitInfoIteratorPropertyInDocument::DocHitInfoIteratorPropertyInDocument( + std::unique_ptr meta_hit_iterator) + : meta_hit_iterator_(std::move(meta_hit_iterator)) {} + +libtextclassifier3::Status DocHitInfoIteratorPropertyInDocument::Advance() { + while (meta_hit_iterator_->Advance().ok()) { + // Currently, the metadata hits added by PropertyExistenceIndexingHandler + // can only have a section id of 0, so the section mask has to be 1 << 0. + if (meta_hit_iterator_->doc_hit_info().hit_section_ids_mask() == (1 << 0)) { + doc_hit_info_ = meta_hit_iterator_->doc_hit_info(); + // Hits returned by "hasProperty" should not be associated with any + // section. + doc_hit_info_.set_hit_section_ids_mask(/*section_id_mask=*/0); + return libtextclassifier3::Status::OK; + } + } + + doc_hit_info_ = DocHitInfo(kInvalidDocumentId); + return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator"); +} + +libtextclassifier3::StatusOr +DocHitInfoIteratorPropertyInDocument::TrimRightMostNode() && { + // Don't generate suggestion if the last operator is this custom function. + return absl_ports::InvalidArgumentError( + "Cannot generate suggestion if the last term is hasProperty()."); +} + +std::string DocHitInfoIteratorPropertyInDocument::ToString() const { + return meta_hit_iterator_->ToString(); +} + +} // namespace lib +} // namespace icing diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-document.h b/icing/index/iterator/doc-hit-info-iterator-property-in-document.h new file mode 100644 index 0000000..bb2c97a --- /dev/null +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-document.h @@ -0,0 +1,73 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_ +#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_ + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/schema/section.h" +#include "icing/store/document-id.h" + +namespace icing { +namespace lib { + +// The iterator returned by the "hasProperty" function in advanced query that +// post-processes metadata hits added by PropertyExistenceIndexingHandler. +// Specifically, it filters out hits that are not recognized as metadata, and +// always set hit_section_ids_mask to 0. +// +// It is marked as a subclass of DocHitInfoLeafIterator because section +// restriction should not be passed down to meta_hit_iterator. +class DocHitInfoIteratorPropertyInDocument : public DocHitInfoLeafIterator { + public: + explicit DocHitInfoIteratorPropertyInDocument( + std::unique_ptr meta_hit_iterator); + + libtextclassifier3::Status Advance() override; + + libtextclassifier3::StatusOr TrimRightMostNode() && override; + + CallStats GetCallStats() const override { + return meta_hit_iterator_->GetCallStats(); + } + + std::string ToString() const override; + + void PopulateMatchedTermsStats( + std::vector* matched_terms_stats, + SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { + if (doc_hit_info_.document_id() == kInvalidDocumentId) { + // Current hit isn't valid, return. + return; + } + meta_hit_iterator_->PopulateMatchedTermsStats(matched_terms_stats, + filtering_section_mask); + } + + private: + std::unique_ptr meta_hit_iterator_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_ diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc index 05778b0..8b98302 100644 --- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc @@ -45,7 +45,6 @@ DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema( libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() { doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; // Maps from SchemaTypeId to a bool indicating whether or not the type has // the requested property. @@ -77,9 +76,6 @@ libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() { if (valid_match) { doc_hit_info_ = delegate_->doc_hit_info(); - hit_intersect_section_ids_mask_ = - delegate_->hit_intersect_section_ids_mask(); - doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_); return libtextclassifier3::Status::OK; } @@ -98,14 +94,6 @@ DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && { "Cannot generate suggestion if the last term is hasPropertyDefined()."); } -int32_t DocHitInfoIteratorPropertyInSchema::GetNumBlocksInspected() const { - return delegate_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorPropertyInSchema::GetNumLeafAdvanceCalls() const { - return delegate_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorPropertyInSchema::ToString() const { return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","), "): ", delegate_->ToString()); diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h index 730c497..c16a1c4 100644 --- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/index/iterator/doc-hit-info-iterator.h" @@ -45,9 +46,11 @@ class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator { libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override; + void MapChildren(const ChildrenMapper& mapper) override { + delegate_ = mapper(std::move(delegate_)); + } - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override { return delegate_->GetCallStats(); } std::string ToString() const override; diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc index 47f5cc5..3f5a0a7 100644 --- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc @@ -199,8 +199,7 @@ TEST_F(DocHitInfoIteratorPropertyInSchemaTest, auto original_iterator = std::make_unique(doc_hit_infos, "hi"); - original_iterator->set_hit_intersect_section_ids_mask( - original_section_id_mask); + original_iterator->set_hit_section_ids_mask(original_section_id_mask); DocHitInfoIteratorPropertyInSchema property_defined_iterator( std::move(original_iterator), document_store_.get(), schema_store_.get(), @@ -218,7 +217,7 @@ TEST_F(DocHitInfoIteratorPropertyInSchemaTest, // The expected mask is the same as the original mask, since the iterator // should treat it as a pass-through. SectionIdMask expected_section_id_mask = original_section_id_mask; - EXPECT_EQ(property_defined_iterator.hit_intersect_section_ids_mask(), + EXPECT_EQ(property_defined_iterator.doc_hit_info().hit_section_ids_mask(), expected_section_id_mask); property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats); diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc index b850a9b..35dc0b9 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc @@ -16,137 +16,142 @@ #include #include +#include #include #include +#include #include +#include #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/absl_ports/str_join.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/iterator/section-restrict-data.h" +#include "icing/proto/search.pb.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/document-store.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { +// An iterator that simply takes ownership of SectionRestrictData. +class SectionRestrictDataHolderIterator : public DocHitInfoIterator { + public: + explicit SectionRestrictDataHolderIterator( + std::unique_ptr delegate, + std::unique_ptr data) + : delegate_(std::move(delegate)), data_(std::move(data)) {} + + libtextclassifier3::Status Advance() override { + auto result = delegate_->Advance(); + doc_hit_info_ = delegate_->doc_hit_info(); + return result; + } + + libtextclassifier3::StatusOr TrimRightMostNode() && override { + ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate, + std::move(*delegate_).TrimRightMostNode()); + if (trimmed_delegate.iterator_ != nullptr) { + trimmed_delegate.iterator_ = + std::make_unique( + std::move(trimmed_delegate.iterator_), std::move(data_)); + } + return trimmed_delegate; + } + + void MapChildren(const ChildrenMapper& mapper) override { + delegate_ = mapper(std::move(delegate_)); + } + + CallStats GetCallStats() const override { return delegate_->GetCallStats(); } + + std::string ToString() const override { return delegate_->ToString(); } + + void PopulateMatchedTermsStats( + std::vector* matched_terms_stats, + SectionIdMask filtering_section_mask) const override { + return delegate_->PopulateMatchedTermsStats(matched_terms_stats, + filtering_section_mask); + } + + private: + std::unique_ptr delegate_; + std::unique_ptr data_; +}; + DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr delegate, + std::unique_ptr delegate, SectionRestrictData* data) + : delegate_(std::move(delegate)), data_(data) {} + +std::unique_ptr +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr iterator, const DocumentStore* document_store, const SchemaStore* schema_store, - std::set target_sections, int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - current_time_ms_(current_time_ms) { - type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] = + std::set target_sections, int64_t current_time_ms) { + std::unordered_map> type_property_filters; + type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] = std::move(target_sections); + auto data = std::make_unique( + document_store, schema_store, current_time_ms, type_property_filters); + std::unique_ptr result = + ApplyRestrictions(std::move(iterator), data.get()); + return std::make_unique(std::move(result), + std::move(data)); } -DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr delegate, +std::unique_ptr +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr iterator, const DocumentStore* document_store, const SchemaStore* schema_store, - const SearchSpecProto& search_spec, - int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - current_time_ms_(current_time_ms) { + const SearchSpecProto& search_spec, int64_t current_time_ms) { + std::unordered_map> type_property_filters; // TODO(b/294274922): Add support for polymorphism in type property filters. for (const TypePropertyMask& type_property_mask : - search_spec.type_property_filters()) { - type_property_filters_[type_property_mask.schema_type()] = + search_spec.type_property_filters()) { + type_property_filters[type_property_mask.schema_type()] = std::set(type_property_mask.paths().begin(), type_property_mask.paths().end()); } + auto data = std::make_unique( + document_store, schema_store, current_time_ms, type_property_filters); + std::unique_ptr result = + ApplyRestrictions(std::move(iterator), data.get()); + return std::make_unique(std::move(result), + std::move(data)); } -DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( - std::unique_ptr delegate, - const DocumentStore* document_store, const SchemaStore* schema_store, - std::unordered_map> - type_property_filters, - std::unordered_map type_property_masks, - int64_t current_time_ms) - : delegate_(std::move(delegate)), - document_store_(*document_store), - schema_store_(*schema_store), - current_time_ms_(current_time_ms), - type_property_filters_(std::move(type_property_filters)), - type_property_masks_(std::move(type_property_masks)) {} - -SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask( - const std::string& schema_type, - const std::set& target_sections) const { - SectionIdMask section_mask = kSectionIdMaskNone; - auto section_metadata_list_or = - schema_store_.GetSectionMetadata(schema_type); - if (!section_metadata_list_or.ok()) { - // The current schema doesn't have section metadata. - return kSectionIdMaskNone; - } - const std::vector* section_metadata_list = - section_metadata_list_or.ValueOrDie(); - for (const SectionMetadata& section_metadata : *section_metadata_list) { - if (target_sections.find(section_metadata.path) != - target_sections.end()) { - section_mask |= UINT64_C(1) << section_metadata.id; +std::unique_ptr +DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::unique_ptr iterator, SectionRestrictData* data) { + ChildrenMapper mapper; + mapper = [&data, &mapper](std::unique_ptr iterator) + -> std::unique_ptr { + if (iterator->is_leaf()) { + return std::make_unique( + std::move(iterator), data); + } else { + iterator->MapChildren(mapper); + return iterator; } - } - return section_mask; -} - -SectionIdMask DocHitInfoIteratorSectionRestrict:: - ComputeAndCacheSchemaTypeAllowedSectionsMask( - const std::string& schema_type) { - if (const auto type_property_mask_itr = - type_property_masks_.find(schema_type); - type_property_mask_itr != type_property_masks_.end()) { - return type_property_mask_itr->second; - } - - // Section id mask of schema_type is never calculated before, so - // calculate it here and put it into type_property_masks_. - // - If type property filters of schema_type or wildcard (*) are - // specified, then create a mask according to the filters. - // - Otherwise, create a mask to match all properties. - SectionIdMask new_section_id_mask = kSectionIdMaskAll; - if (const auto itr = type_property_filters_.find(schema_type); - itr != type_property_filters_.end()) { - // Property filters defined for given schema type - new_section_id_mask = GenerateSectionMask( - schema_type, itr->second); - } else if (const auto wildcard_itr = type_property_filters_.find( - std::string(SchemaStore::kSchemaTypeWildcard)); - wildcard_itr != type_property_filters_.end()) { - // Property filters defined for wildcard entry - new_section_id_mask = GenerateSectionMask( - schema_type, wildcard_itr->second); - } else { - // Do not cache the section mask if no property filters apply to this schema - // type to avoid taking up unnecessary space. - return kSectionIdMaskAll; - } - - type_property_masks_[schema_type] = new_section_id_mask; - return new_section_id_mask; + }; + return mapper(std::move(iterator)); } libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; while (delegate_->Advance().ok()) { DocumentId document_id = delegate_->doc_hit_info().document_id(); - SectionIdMask section_id_mask = - delegate_->doc_hit_info().hit_section_ids_mask(); - - auto data_optional = document_store_.GetAliveDocumentFilterData( - document_id, current_time_ms_); + auto data_optional = data_->document_store().GetAliveDocumentFilterData( + document_id, data_->current_time_ms()); if (!data_optional) { // Ran into some error retrieving information on this hit, skip continue; @@ -154,34 +159,35 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { // Guaranteed that the DocumentFilterData exists at this point SchemaTypeId schema_type_id = data_optional.value().schema_type_id(); - auto schema_type_or = schema_store_.GetSchemaType(schema_type_id); + auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id); if (!schema_type_or.ok()) { // Ran into error retrieving schema type, skip continue; } const std::string* schema_type = std::move(schema_type_or).ValueOrDie(); SectionIdMask allowed_sections_mask = - ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type); + data_->ComputeAllowedSectionsMask(*schema_type); // A hit can be in multiple sections at once, need to check which of the // section ids match the sections allowed by type_property_masks_. This can // be done by doing a bitwise and of the section_id_mask in the doc hit and // the allowed_sections_mask. - hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask; + SectionIdMask section_id_mask = + delegate_->doc_hit_info().hit_section_ids_mask() & + allowed_sections_mask; // Return this document if: // - the sectionIdMask is not empty after applying property filters, or // - no property filters apply for its schema type (allowed_sections_mask // == kSectionIdMaskAll). This is needed to ensure that in case of empty // query (which uses doc-hit-info-iterator-all-document-id), where - // section_id_mask (and hence hit_intersect_section_ids_mask_) is - // kSectionIdMaskNone, doc hits with no property restrictions don't get - // filtered out. Doc hits for schema types for whom property filters are - // specified will still get filtered out. - if (allowed_sections_mask == kSectionIdMaskAll - || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) { + // section_id_mask is kSectionIdMaskNone, doc hits with no property + // restrictions don't get filtered out. Doc hits for schema types for + // whom property filters are specified will still get filtered out. + if (allowed_sections_mask == kSectionIdMaskAll || + section_id_mask != kSectionIdMaskNone) { doc_hit_info_ = delegate_->doc_hit_info(); - doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_); + doc_hit_info_.set_hit_section_ids_mask(section_id_mask); return libtextclassifier3::Status::OK; } // Didn't find a matching section name for this hit. Continue. @@ -200,14 +206,14 @@ DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && { // type_property_filters_ when code flow reaches here. If the InternalError // below is returned, that means TrimRightMostNode hasn't been called in the // right context. - const auto it = type_property_filters_.find("*"); - if (it == type_property_filters_.end()) { + const auto it = data_->type_property_filters().find("*"); + if (it == data_->type_property_filters().end()) { return absl_ports::InternalError( "A wildcard entry should always be present in type property filters " "whenever TrimRightMostNode() is called for " "DocHitInfoIteratorSectionRestrict"); } - std::set& target_sections = it->second; + const std::set& target_sections = it->second; if (target_sections.empty()) { return absl_ports::InternalError( "Target sections should not be empty whenever TrimRightMostNode() is " @@ -222,24 +228,14 @@ DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && { trimmed_delegate.iterator_ = std::unique_ptr( new DocHitInfoIteratorSectionRestrict( - std::move(trimmed_delegate.iterator_), &document_store_, - &schema_store_, std::move(type_property_filters_), - std::move(type_property_masks_), current_time_ms_)); + std::move(trimmed_delegate.iterator_), std::move(data_))); return std::move(trimmed_delegate); } -int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const { - return delegate_->GetNumBlocksInspected(); -} - -int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const { - return delegate_->GetNumLeafAdvanceCalls(); -} - std::string DocHitInfoIteratorSectionRestrict::ToString() const { std::string output = ""; - for (auto it = type_property_filters_.cbegin(); - it != type_property_filters_.cend(); it++) { + for (auto it = data_->type_property_filters().cbegin(); + it != data_->type_property_filters().cend(); it++) { std::string paths = absl_ports::StrJoin(it->second, ","); output += (it->first) + ":" + (paths) + "; "; } diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h index 5d44ed7..387ff52 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h @@ -17,15 +17,18 @@ #include #include +#include #include -#include -#include +#include #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/iterator/section-restrict-data.h" +#include "icing/proto/search.pb.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" -#include "icing/store/document-filter-data.h" +#include "icing/store/document-id.h" #include "icing/store/document-store.h" namespace icing { @@ -38,36 +41,48 @@ namespace lib { // That class is meant to be applied to the root of a query tree and filter over // all results at the end. This class is more used in the limited scope of a // term or a small group of terms. -class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator { +class DocHitInfoIteratorSectionRestrict : public DocHitInfoLeafIterator { public: // Does not take any ownership, and all pointers must refer to valid objects // that outlive the one constructed. explicit DocHitInfoIteratorSectionRestrict( - std::unique_ptr delegate, + std::unique_ptr delegate, SectionRestrictData* data); + + // Methods that apply section restrictions to all DocHitInfoLeafIterator nodes + // inside the provided iterator tree, and return the root of the tree + // afterwards. These methods do not take any ownership for the raw pointer + // parameters, which must refer to valid objects that outlive the iterator + // returned. + static std::unique_ptr ApplyRestrictions( + std::unique_ptr iterator, const DocumentStore* document_store, const SchemaStore* schema_store, std::set target_sections, int64_t current_time_ms); - - explicit DocHitInfoIteratorSectionRestrict( - std::unique_ptr delegate, + static std::unique_ptr ApplyRestrictions( + std::unique_ptr iterator, const DocumentStore* document_store, const SchemaStore* schema_store, - const SearchSpecProto& search_spec, - int64_t current_time_ms); + const SearchSpecProto& search_spec, int64_t current_time_ms); + static std::unique_ptr ApplyRestrictions( + std::unique_ptr iterator, SectionRestrictData* data); libtextclassifier3::Status Advance() override; libtextclassifier3::StatusOr TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override; - - int32_t GetNumLeafAdvanceCalls() const override; + CallStats GetCallStats() const override { return delegate_->GetCallStats(); } std::string ToString() const override; - // Note that the DocHitInfoIteratorSectionRestrict is the only iterator that - // should set filtering_section_mask, hence the received - // filtering_section_mask is ignored and the filtering_section_mask passed to - // the delegate will be set to hit_intersect_section_ids_mask_. This will - // allow to filter the matching sections in the delegate. + // Note that the DocHitInfoIteratorSectionRestrict can only be applied at + // DocHitInfoLeafIterator, which can be a term iterator or another + // DocHitInfoIteratorSectionRestrict. + // + // To filter the matching sections, filtering_section_mask should be set to + // doc_hit_info_.hit_section_ids_mask() held in the outermost + // DocHitInfoIteratorSectionRestrict, which is equal to the intersection of + // all hit_section_ids_mask in the DocHitInfoIteratorSectionRestrict chain, + // since for any two section restrict iterators chained together, the outer + // one's hit_section_ids_mask is always a subset of the inner one's + // hit_section_ids_mask. void PopulateMatchedTermsStats( std::vector* matched_terms_stats, SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { @@ -77,55 +92,14 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator { } delegate_->PopulateMatchedTermsStats( matched_terms_stats, - /*filtering_section_mask=*/hit_intersect_section_ids_mask_); + /*filtering_section_mask=*/filtering_section_mask & + doc_hit_info_.hit_section_ids_mask()); } private: - explicit DocHitInfoIteratorSectionRestrict( - std::unique_ptr delegate, - const DocumentStore* document_store, const SchemaStore* schema_store, - std::unordered_map> - type_property_filters, - std::unordered_map type_property_masks, - int64_t current_time_ms); - // Calculates the section mask of allowed sections(determined by the property - // filters map) for the given schema type and caches the same for any future - // calls. - // - // Returns: - // - If type_property_filters_ has an entry for the given schema type or - // wildcard(*), return a bitwise or of section IDs in the schema type that - // that are also present in the relevant filter list. - // - Otherwise, return kSectionIdMaskAll. - SectionIdMask ComputeAndCacheSchemaTypeAllowedSectionsMask( - const std::string& schema_type); - // Generates a section mask for the given schema type and the target sections. - // - // Returns: - // - A bitwise or of section IDs in the schema_type that that are also - // present in the target_sections list. - // - If none of the sections in the schema_type are present in the - // target_sections list, return kSectionIdMaskNone. - // This is done by doing a bitwise or of the target section ids for the given - // schema type. - SectionIdMask GenerateSectionMask(const std::string& schema_type, - const std::set& - target_sections) const; - std::unique_ptr delegate_; - const DocumentStore& document_store_; - const SchemaStore& schema_store_; - int64_t current_time_ms_; - - // Map of property filters per schema type. Supports wildcard(*) for schema - // type that will apply to all schema types that are not specifically - // specified in the mapping otherwise. - std::unordered_map> - type_property_filters_; - // Mapping of schema type to the section mask of allowed sections for that - // schema type. This section mask is lazily calculated based on the specified - // property filters and cached for any future use. - std::unordered_map type_property_masks_; + // Does not own. + SectionRestrictData* data_; }; } // namespace lib diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc index 1500571..ee65fe1 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc @@ -15,6 +15,7 @@ #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h" #include +#include #include #include #include @@ -150,48 +151,50 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, auto original_iterator = std::make_unique(doc_hit_infos, "hi"); - original_iterator->set_hit_intersect_section_ids_mask( - original_section_id_mask); + original_iterator->set_hit_section_ids_mask(original_section_id_mask); // Filtering for the indexed section name (which has a section id of 0) should // get a result. - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_0}, - fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_0}, + fake_clock_.GetSystemTimeMilliseconds()); std::vector matched_terms_stats; - section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats); EXPECT_THAT(matched_terms_stats, IsEmpty()); - ICING_EXPECT_OK(section_restrict_iterator.Advance()); - EXPECT_THAT(section_restrict_iterator.doc_hit_info().document_id(), + ICING_EXPECT_OK(section_restrict_iterator->Advance()); + EXPECT_THAT(section_restrict_iterator->doc_hit_info().document_id(), Eq(document_id)); SectionIdMask expected_section_id_mask = 0b00000001; // hits in sections 0 - EXPECT_EQ(section_restrict_iterator.hit_intersect_section_ids_mask(), + EXPECT_EQ(section_restrict_iterator->doc_hit_info().hit_section_ids_mask(), expected_section_id_mask); - section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats); std::unordered_map expected_section_ids_tf_map = {{0, 1}}; EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo( "hi", expected_section_ids_tf_map))); - EXPECT_FALSE(section_restrict_iterator.Advance().ok()); + EXPECT_FALSE(section_restrict_iterator->Advance().ok()); } TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) { std::unique_ptr original_iterator_empty = std::make_unique(); - DocHitInfoIteratorSectionRestrict filtered_iterator( - std::move(original_iterator_empty), document_store_.get(), - schema_store_.get(), /*target_sections=*/std::set(), - fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr filtered_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator_empty), document_store_.get(), + schema_store_.get(), /*target_sections=*/std::set(), + fake_clock_.GetSystemTimeMilliseconds()); - EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); + EXPECT_THAT(GetDocumentIds(filtered_iterator.get()), IsEmpty()); std::vector matched_terms_stats; - filtered_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + filtered_iterator->PopulateMatchedTermsStats(&matched_terms_stats); EXPECT_THAT(matched_terms_stats, IsEmpty()); } @@ -210,12 +213,14 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) { std::make_unique(doc_hit_infos); // Filtering for the indexed section name should get a result - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_0}, - fake_clock_.GetSystemTimeMilliseconds()); - - EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_0}, + fake_clock_.GetSystemTimeMilliseconds()); + + EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), ElementsAre(document_id)); } @@ -236,18 +241,18 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::make_unique(doc_hit_infos); // Filter for both target_sections - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_0, indexed_section_1}, - fake_clock_.GetSystemTimeMilliseconds()); - - ICING_ASSERT_OK(section_restrict_iterator.Advance()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_0, indexed_section_1}, + fake_clock_.GetSystemTimeMilliseconds()); + + ICING_ASSERT_OK(section_restrict_iterator->Advance()); std::vector expected_section_ids = {kIndexedSectionId0, kIndexedSectionId1}; - EXPECT_THAT(section_restrict_iterator.doc_hit_info(), + EXPECT_THAT(section_restrict_iterator->doc_hit_info(), EqualsDocHitInfo(document_id, expected_section_ids)); - EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(), - Eq(section_id_mask)); } TEST_F(DocHitInfoIteratorSectionRestrictTest, @@ -267,17 +272,17 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::make_unique(doc_hit_infos); // Filter for both target_sections - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_1}, - fake_clock_.GetSystemTimeMilliseconds()); - - ICING_ASSERT_OK(section_restrict_iterator.Advance()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_1}, + fake_clock_.GetSystemTimeMilliseconds()); + + ICING_ASSERT_OK(section_restrict_iterator->Advance()); std::vector expected_section_ids = {kIndexedSectionId1}; - EXPECT_THAT(section_restrict_iterator.doc_hit_info(), + EXPECT_THAT(section_restrict_iterator->doc_hit_info(), EqualsDocHitInfo(document_id, expected_section_ids)); - EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(), - Eq(1U << kIndexedSectionId1)); } TEST_F(DocHitInfoIteratorSectionRestrictTest, @@ -296,17 +301,17 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::make_unique(doc_hit_infos); // Filter for both target_sections - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_0, indexed_section_1}, - fake_clock_.GetSystemTimeMilliseconds()); - - ICING_ASSERT_OK(section_restrict_iterator.Advance()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_0, indexed_section_1}, + fake_clock_.GetSystemTimeMilliseconds()); + + ICING_ASSERT_OK(section_restrict_iterator->Advance()); std::vector expected_section_ids = {kIndexedSectionId1}; - EXPECT_THAT(section_restrict_iterator.doc_hit_info(), + EXPECT_THAT(section_restrict_iterator->doc_hit_info(), EqualsDocHitInfo(document_id, expected_section_ids)); - EXPECT_THAT(section_restrict_iterator.hit_intersect_section_ids_mask(), - Eq(1U << kIndexedSectionId1)); } TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) { @@ -317,13 +322,15 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) { std::make_unique(doc_hit_infos); // Filtering for the indexed section name should get a result - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds()); - EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty()); + EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty()); std::vector matched_terms_stats; - section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats); EXPECT_THAT(matched_terms_stats, IsEmpty()); } @@ -343,14 +350,16 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::make_unique(doc_hit_infos); // Filtering for the indexed section name should get a result - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{"some_section_name"}, - fake_clock_.GetSystemTimeMilliseconds()); - - EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{"some_section_name"}, + fake_clock_.GetSystemTimeMilliseconds()); + + EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty()); std::vector matched_terms_stats; - section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats); EXPECT_THAT(matched_terms_stats, IsEmpty()); } @@ -368,14 +377,16 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::unique_ptr original_iterator = std::make_unique(doc_hit_infos); - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_0}, - fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_0}, + fake_clock_.GetSystemTimeMilliseconds()); - EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty()); + EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty()); std::vector matched_terms_stats; - section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats); EXPECT_THAT(matched_terms_stats, IsEmpty()); } @@ -396,37 +407,37 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::unique_ptr original_iterator = std::make_unique(doc_hit_infos); - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{indexed_section_0}, - fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{indexed_section_0}, + fake_clock_.GetSystemTimeMilliseconds()); - EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty()); + EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty()); std::vector matched_terms_stats; - section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats); EXPECT_THAT(matched_terms_stats, IsEmpty()); } -TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) { +TEST_F(DocHitInfoIteratorSectionRestrictTest, GetCallStats) { + DocHitInfoIterator::CallStats original_call_stats( + /*num_leaf_advance_calls_lite_index_in=*/2, + /*num_leaf_advance_calls_main_index_in=*/5, + /*num_leaf_advance_calls_integer_index_in=*/3, + /*num_leaf_advance_calls_no_index_in=*/1, + /*num_blocks_inspected_in=*/4); // arbitrary value auto original_iterator = std::make_unique(); - original_iterator->SetNumBlocksInspected(5); + original_iterator->SetCallStats(original_call_stats); - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), + /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds()); - EXPECT_THAT(section_restrict_iterator.GetNumBlocksInspected(), Eq(5)); -} - -TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumLeafAdvanceCalls) { - auto original_iterator = std::make_unique(); - original_iterator->SetNumLeafAdvanceCalls(6); - - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds()); - - EXPECT_THAT(section_restrict_iterator.GetNumLeafAdvanceCalls(), Eq(6)); + EXPECT_THAT(section_restrict_iterator->GetCallStats(), + Eq(original_call_stats)); } TEST_F(DocHitInfoIteratorSectionRestrictTest, @@ -444,12 +455,10 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, // Anything that's not 0, which is the indexed property SectionId not_matching_section_id = 2; - // Build an interator tree like: - // Restrict - // | + // Build an iterator tree like: // AND // / \ - // [1, 1],[2, 2] [3, 2] + // [1, 1],[2, 2] [3, 2] std::vector left_infos = { DocHitInfo(document_id1, 1U << matching_section_id), DocHitInfo(document_id2, 1U << not_matching_section_id)}; @@ -460,14 +469,21 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, std::make_unique(left_infos); std::unique_ptr right_iterator = std::make_unique(right_infos, "term", 10); - std::unique_ptr original_iterator = std::make_unique(std::move(left_iterator), std::move(right_iterator)); - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - {indexed_section_0}, fake_clock_.GetSystemTimeMilliseconds()); + // After applying section restriction: + // AND + // / \ + // Restrict Restrict + // | | + // [1, 1],[2, 2] [3, 2] + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), {indexed_section_0}, + fake_clock_.GetSystemTimeMilliseconds()); // The trimmed tree. // Restrict @@ -475,12 +491,12 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, // [1, 1],[2, 2] ICING_ASSERT_OK_AND_ASSIGN( DocHitInfoIterator::TrimmedNode node, - std::move(section_restrict_iterator).TrimRightMostNode()); + std::move(*section_restrict_iterator).TrimRightMostNode()); EXPECT_THAT(GetDocumentIds(node.iterator_.get()), ElementsAre(document_id1)); EXPECT_THAT(node.term_, Eq("term")); EXPECT_THAT(node.term_start_index_, Eq(10)); - EXPECT_THAT(node.target_section_, Eq("")); + EXPECT_THAT(node.target_section_, Eq(indexed_section_0)); } TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) { @@ -505,14 +521,16 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) { std::unique_ptr original_iterator = std::make_unique(doc_infos, "term", 10); - DocHitInfoIteratorSectionRestrict section_restrict_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - {indexed_section_0}, fake_clock_.GetSystemTimeMilliseconds()); + std::unique_ptr section_restrict_iterator = + DocHitInfoIteratorSectionRestrict::ApplyRestrictions( + std::move(original_iterator), document_store_.get(), + schema_store_.get(), {indexed_section_0}, + fake_clock_.GetSystemTimeMilliseconds()); // The trimmed tree has null iterator but has target section. ICING_ASSERT_OK_AND_ASSIGN( DocHitInfoIterator::TrimmedNode node, - std::move(section_restrict_iterator).TrimRightMostNode()); + std::move(*section_restrict_iterator).TrimRightMostNode()); EXPECT_THAT(node.iterator_, testing::IsNull()); EXPECT_THAT(node.term_, Eq("term")); diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h index a77b91c..c75fb33 100644 --- a/icing/index/iterator/doc-hit-info-iterator-test-util.h +++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h @@ -71,7 +71,7 @@ class DocHitInfoTermFrequencyPair { // will then proceed to return the doc_hit_infos in order as Advance's are // called. After all doc_hit_infos are returned, Advance will return a NotFound // error (also like normal DocHitInfoIterators). -class DocHitInfoIteratorDummy : public DocHitInfoIterator { +class DocHitInfoIteratorDummy : public DocHitInfoLeafIterator { public: DocHitInfoIteratorDummy() = default; explicit DocHitInfoIteratorDummy( @@ -140,25 +140,14 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator { matched_terms_stats->push_back(term_stats); } - void set_hit_intersect_section_ids_mask( - SectionIdMask hit_intersect_section_ids_mask) { - hit_intersect_section_ids_mask_ = hit_intersect_section_ids_mask; + void set_hit_section_ids_mask(SectionIdMask hit_section_ids_mask) { + doc_hit_info_.set_hit_section_ids_mask(hit_section_ids_mask); } - int32_t GetNumBlocksInspected() const override { - return num_blocks_inspected_; - } - - void SetNumBlocksInspected(int32_t num_blocks_inspected) { - num_blocks_inspected_ = num_blocks_inspected; - } - - int32_t GetNumLeafAdvanceCalls() const override { - return num_leaf_advance_calls_; - } + CallStats GetCallStats() const override { return call_stats_; } - void SetNumLeafAdvanceCalls(int32_t num_leaf_advance_calls) { - num_leaf_advance_calls_ = num_leaf_advance_calls; + void SetCallStats(CallStats call_stats) { + call_stats_ = std::move(call_stats); } std::string ToString() const override { @@ -176,8 +165,7 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator { private: int32_t index_ = -1; - int32_t num_blocks_inspected_ = 0; - int32_t num_leaf_advance_calls_ = 0; + CallStats call_stats_; std::vector doc_hit_infos_; std::string term_; int term_start_index_; diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h index d8cd3ad..728f957 100644 --- a/icing/index/iterator/doc-hit-info-iterator.h +++ b/icing/index/iterator/doc-hit-info-iterator.h @@ -17,8 +17,12 @@ #include #include +#include +#include #include #include +#include +#include #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" @@ -52,8 +56,7 @@ struct TermMatchInfo { // Iterator over DocHitInfos (collapsed Hits) in REVERSE document_id order. // -// NOTE: You must call Advance() before calling hit_info() or -// hit_intersect_section_ids_mask(). +// NOTE: You must call Advance() before calling hit_info(). // // Example: // DocHitInfoIterator itr = GetIterator(...); @@ -62,6 +65,112 @@ struct TermMatchInfo { // } class DocHitInfoIterator { public: + using ChildrenMapper = std::function( + std::unique_ptr)>; + + // CallStats is a wrapper class of all stats to collect among all levels of + // the DocHitInfoIterator tree. Mostly the internal nodes will aggregate the + // number of all leaf nodes, while the leaf nodes will return the actual + // numbers. + struct CallStats { + // The number of times Advance() was called on the leaf node for term lite + // index. + // - Leaf nodes: + // - DocHitInfoIteratorTermLite should maintain and set it correctly. + // - Others should set it 0. + // - Internal nodes: should aggregate values from all children. + int32_t num_leaf_advance_calls_lite_index; + + // The number of times Advance() was called on the leaf node for term main + // index. + // - Leaf nodes: + // - DocHitInfoIteratorTermMain should maintain and set it correctly. + // - Others should set it 0. + // - Internal nodes: should aggregate values from all children. + int32_t num_leaf_advance_calls_main_index; + + // The number of times Advance() was called on the leaf node for integer + // index. + // - Leaf nodes: + // - DocHitInfoIteratorNumeric should maintain and set it correctly. + // - Others should set it 0. + // - Internal nodes: should aggregate values from all children. + int32_t num_leaf_advance_calls_integer_index; + + // The number of times Advance() was called on the leaf node without reading + // any hits from index. Usually it is a special field for + // DocHitInfoIteratorAllDocumentId. + // - Leaf nodes: + // - DocHitInfoIteratorAllDocumentId should maintain and set it correctly. + // - Others should set it 0. + // - Internal nodes: should aggregate values from all children. + int32_t num_leaf_advance_calls_no_index; + + // The number of flash index blocks that have been read as a result of + // operations on this object. + // - Leaf nodes: should maintain and set it correctly for all child classes + // involving flash index block access. + // - Internal nodes: should aggregate values from all children. + int32_t num_blocks_inspected; + + explicit CallStats() + : CallStats(/*num_leaf_advance_calls_lite_index_in=*/0, + /*num_leaf_advance_calls_main_index_in=*/0, + /*num_leaf_advance_calls_integer_index_in=*/0, + /*num_leaf_advance_calls_no_index_in=*/0, + /*num_blocks_inspected_in=*/0) {} + + explicit CallStats(int32_t num_leaf_advance_calls_lite_index_in, + int32_t num_leaf_advance_calls_main_index_in, + int32_t num_leaf_advance_calls_integer_index_in, + int32_t num_leaf_advance_calls_no_index_in, + int32_t num_blocks_inspected_in) + : num_leaf_advance_calls_lite_index( + num_leaf_advance_calls_lite_index_in), + num_leaf_advance_calls_main_index( + num_leaf_advance_calls_main_index_in), + num_leaf_advance_calls_integer_index( + num_leaf_advance_calls_integer_index_in), + num_leaf_advance_calls_no_index(num_leaf_advance_calls_no_index_in), + num_blocks_inspected(num_blocks_inspected_in) {} + + int32_t num_leaf_advance_calls() const { + return num_leaf_advance_calls_lite_index + + num_leaf_advance_calls_main_index + + num_leaf_advance_calls_integer_index + + num_leaf_advance_calls_no_index; + } + + bool operator==(const CallStats& other) const { + return num_leaf_advance_calls_lite_index == + other.num_leaf_advance_calls_lite_index && + num_leaf_advance_calls_main_index == + other.num_leaf_advance_calls_main_index && + num_leaf_advance_calls_integer_index == + other.num_leaf_advance_calls_integer_index && + num_leaf_advance_calls_no_index == + other.num_leaf_advance_calls_no_index && + num_blocks_inspected == other.num_blocks_inspected; + } + + CallStats operator+(const CallStats& other) const { + return CallStats(num_leaf_advance_calls_lite_index + + other.num_leaf_advance_calls_lite_index, + num_leaf_advance_calls_main_index + + other.num_leaf_advance_calls_main_index, + num_leaf_advance_calls_integer_index + + other.num_leaf_advance_calls_integer_index, + num_leaf_advance_calls_no_index + + other.num_leaf_advance_calls_no_index, + num_blocks_inspected + other.num_blocks_inspected); + } + + CallStats& operator+=(const CallStats& other) { + *this = *this + other; + return *this; + } + }; + struct TrimmedNode { // the query results which we should only search for suggestion in these // documents. @@ -100,6 +209,11 @@ class DocHitInfoIterator { // INVALID_ARGUMENT if the right-most node is not suppose to be trimmed. virtual libtextclassifier3::StatusOr TrimRightMostNode() && = 0; + // Map all direct children of this iterator according to the passed mapper. + virtual void MapChildren(const ChildrenMapper& mapper) = 0; + + virtual bool is_leaf() { return false; } + virtual ~DocHitInfoIterator() = default; // Returns: @@ -114,20 +228,8 @@ class DocHitInfoIterator { // construction or if Advance returned an error. const DocHitInfo& doc_hit_info() const { return doc_hit_info_; } - // SectionIdMask representing which sections (if any) have matched *ALL* query - // terms for the current document_id. - SectionIdMask hit_intersect_section_ids_mask() const { - return hit_intersect_section_ids_mask_; - } - - // Gets the number of flash index blocks that have been read as a - // result of operations on this object. - virtual int32_t GetNumBlocksInspected() const = 0; - - // HitIterators may be constructed into trees. Internal nodes will return the - // sum of the number of Advance() calls to all leaf nodes. Leaf nodes will - // return the number of times Advance() was called on it. - virtual int32_t GetNumLeafAdvanceCalls() const = 0; + // Returns CallStats of the DocHitInfoIterator tree. + virtual CallStats GetCallStats() const = 0; // A string representing the iterator. virtual std::string ToString() const = 0; @@ -145,7 +247,6 @@ class DocHitInfoIterator { protected: DocHitInfo doc_hit_info_; - SectionIdMask hit_intersect_section_ids_mask_ = kSectionIdMaskNone; // Helper function to advance the given iterator to at most the given // document_id. @@ -160,11 +261,20 @@ class DocHitInfoIterator { // Didn't find anything for the other iterator, reset to invalid values and // return. doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } -}; // namespace DocHitInfoIterator +}; + +// A leaf node is a term node or a chain of section restriction node applied on +// a term node. +class DocHitInfoLeafIterator : public DocHitInfoIterator { + public: + bool is_leaf() override { return true; } + + // Calling MapChildren on leaf node does not make sense, and will do nothing. + void MapChildren(const ChildrenMapper& mapper) override {} +}; } // namespace lib } // namespace icing diff --git a/icing/index/iterator/section-restrict-data.cc b/icing/index/iterator/section-restrict-data.cc new file mode 100644 index 0000000..085437d --- /dev/null +++ b/icing/index/iterator/section-restrict-data.cc @@ -0,0 +1,82 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/iterator/section-restrict-data.h" + +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" + +namespace icing { +namespace lib { + +SectionIdMask SectionRestrictData::GenerateSectionMask( + const std::string& schema_type, + const std::set& target_sections) const { + SectionIdMask section_mask = kSectionIdMaskNone; + auto section_metadata_list = schema_store_.GetSectionMetadata(schema_type); + if (!section_metadata_list.ok()) { + // The current schema doesn't have section metadata. + return kSectionIdMaskNone; + } + for (const SectionMetadata& section_metadata : + *section_metadata_list.ValueOrDie()) { + if (target_sections.find(section_metadata.path) != target_sections.end()) { + section_mask |= UINT64_C(1) << section_metadata.id; + } + } + return section_mask; +} + +SectionIdMask SectionRestrictData::ComputeAllowedSectionsMask( + const std::string& schema_type) { + if (const auto type_property_mask_itr = + type_property_masks_.find(schema_type); + type_property_mask_itr != type_property_masks_.end()) { + return type_property_mask_itr->second; + } + + // Section id mask of schema_type is never calculated before, so + // calculate it here and put it into type_property_masks_. + // - If type property filters of schema_type or wildcard (*) are + // specified, then create a mask according to the filters. + // - Otherwise, create a mask to match all properties. + SectionIdMask new_section_id_mask = kSectionIdMaskAll; + if (const auto itr = type_property_filters_.find(schema_type); + itr != type_property_filters_.end()) { + // Property filters defined for given schema type + new_section_id_mask = GenerateSectionMask(schema_type, itr->second); + } else if (const auto wildcard_itr = type_property_filters_.find( + std::string(SchemaStore::kSchemaTypeWildcard)); + wildcard_itr != type_property_filters_.end()) { + // Property filters defined for wildcard entry + new_section_id_mask = + GenerateSectionMask(schema_type, wildcard_itr->second); + } else { + // Do not cache the section mask if no property filters apply to this schema + // type to avoid taking up unnecessary space. + return kSectionIdMaskAll; + } + + type_property_masks_[schema_type] = new_section_id_mask; + return new_section_id_mask; +} + +} // namespace lib +} // namespace icing diff --git a/icing/index/iterator/section-restrict-data.h b/icing/index/iterator/section-restrict-data.h new file mode 100644 index 0000000..26ca597 --- /dev/null +++ b/icing/index/iterator/section-restrict-data.h @@ -0,0 +1,98 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_ +#define ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_ + +#include +#include +#include +#include +#include + +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/store/document-store.h" + +namespace icing { +namespace lib { + +class SectionRestrictData { + public: + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. + SectionRestrictData(const DocumentStore* document_store, + const SchemaStore* schema_store, int64_t current_time_ms, + std::unordered_map> + type_property_filters) + : document_store_(*document_store), + schema_store_(*schema_store), + current_time_ms_(current_time_ms), + type_property_filters_(std::move(type_property_filters)) {} + + // Calculates the section mask of allowed sections(determined by the + // property filters map) for the given schema type and caches the same for any + // future calls. + // + // Returns: + // - If type_property_filters_ has an entry for the given schema type or + // wildcard(*), return a bitwise or of section IDs in the schema type + // that that are also present in the relevant filter list. + // - Otherwise, return kSectionIdMaskAll. + SectionIdMask ComputeAllowedSectionsMask(const std::string& schema_type); + + const DocumentStore& document_store() const { return document_store_; } + + const SchemaStore& schema_store() const { return schema_store_; } + + int64_t current_time_ms() const { return current_time_ms_; } + + const std::unordered_map>& + type_property_filters() const { + return type_property_filters_; + } + + private: + const DocumentStore& document_store_; + const SchemaStore& schema_store_; + int64_t current_time_ms_; + + // Map of property filters per schema type. Supports wildcard(*) for schema + // type that will apply to all schema types that are not specifically + // specified in the mapping otherwise. + std::unordered_map> type_property_filters_; + // Mapping of schema type to the section mask of allowed sections for that + // schema type. This section mask is lazily calculated based on the + // specified property filters and cached for any future use. + std::unordered_map type_property_masks_; + + // Generates a section mask for the given schema type and the target + // sections. + // + // Returns: + // - A bitwise or of section IDs in the schema_type that that are also + // present in the target_sections list. + // - If none of the sections in the schema_type are present in the + // target_sections list, return kSectionIdMaskNone. + // This is done by doing a bitwise or of the target section ids for the + // given schema type. + SectionIdMask GenerateSectionMask( + const std::string& schema_type, + const std::set& target_sections) const; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_ -- cgit v1.2.3