diff options
author | Alexander Dorokhine <adorokhine@google.com> | 2021-08-13 11:07:18 -0700 |
---|---|---|
committer | Alexander Dorokhine <adorokhine@google.com> | 2021-08-13 11:07:18 -0700 |
commit | 71eeeb436ec794daef5187d12ca9aba2c5773d7d (patch) | |
tree | 5129f3a74eb616eb2856c6db3392011c1ce384f8 /icing/index/iterator | |
parent | 40c7203fab1d491f62f223fad10eddc180605729 (diff) | |
download | icing-71eeeb436ec794daef5187d12ca9aba2c5773d7d.tar.gz |
Merge androidx-platform-dev/external/icing upstream-master into upstream-master
Change-Id: Id39bea14b3a3d378d722dc552e4f3bd4249f3f94
Diffstat (limited to 'icing/index/iterator')
9 files changed, 288 insertions, 281 deletions
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc index 66f87bd..39aa969 100644 --- a/icing/index/iterator/doc-hit-info-iterator-and.cc +++ b/icing/index/iterator/doc-hit-info-iterator-and.cc @@ -162,6 +162,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() { DocumentId unused; ICING_ASSIGN_OR_RETURN( unused, AdvanceTo(iterator.get(), potential_document_id)); + (void)unused; // Silence unused warning. } if (iterator->doc_hit_info().document_id() == potential_document_id) { diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc index c6cb86d..933f9b5 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc @@ -31,7 +31,6 @@ #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/document-store.h" -#include "icing/util/clock.h" namespace icing { namespace lib { @@ -39,12 +38,11 @@ namespace lib { DocHitInfoIteratorFilter::DocHitInfoIteratorFilter( std::unique_ptr<DocHitInfoIterator> delegate, const DocumentStore* document_store, const SchemaStore* schema_store, - const Clock* clock, const Options& options) + const Options& options) : delegate_(std::move(delegate)), document_store_(*document_store), schema_store_(*schema_store), - options_(options), - current_time_milliseconds_(clock->GetSystemTimeMilliseconds()) { + options_(options) { // Precompute all the NamespaceIds for (std::string_view name_space : options_.namespaces) { auto namespace_id_or = document_store_.GetNamespaceId(name_space); @@ -67,61 +65,50 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter( } libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() { - if (!delegate_->Advance().ok()) { - // Didn't find anything on the delegate iterator. - doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; - return absl_ports::ResourceExhaustedError( - "No more DocHitInfos in iterator"); - } - - if (current_time_milliseconds_ < 0) { - // This shouldn't happen, but we add a sanity check here for any unknown - // errors. - return absl_ports::InternalError( - "Couldn't get current time. Try again in a bit"); - } - - if (!document_store_.DoesDocumentExist( - delegate_->doc_hit_info().document_id())) { - // Document doesn't exist, keep searching - return Advance(); - } + while (delegate_->Advance().ok()) { + if (!document_store_.DoesDocumentExist( + delegate_->doc_hit_info().document_id())) { + // Document doesn't exist, keep searching. This handles deletions and + // expired documents. + continue; + } - // Try to get the DocumentFilterData - auto document_filter_data_or = document_store_.GetDocumentFilterData( - delegate_->doc_hit_info().document_id()); - if (!document_filter_data_or.ok()) { - // Didn't find the DocumentFilterData in the filter cache. This could be - // because the DocumentId isn't valid or the filter cache is in some invalid - // state. This is bad, but not the query's responsibility to fix, so just - // skip this result for now. - return Advance(); - } - // We should be guaranteed that this exists now. - DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie(); + // Try to get the DocumentFilterData + auto document_filter_data_or = document_store_.GetDocumentFilterData( + delegate_->doc_hit_info().document_id()); + if (!document_filter_data_or.ok()) { + // Didn't find the DocumentFilterData in the filter cache. This could be + // because the DocumentId isn't valid or the filter cache is in some + // invalid state. This is bad, but not the query's responsibility to fix, + // so just skip this result for now. + continue; + } + // We should be guaranteed that this exists now. + DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie(); - if (!options_.namespaces.empty() && - target_namespace_ids_.count(data.namespace_id()) == 0) { - // Doesn't match one of the specified namespaces. Keep searching - return Advance(); - } + if (!options_.namespaces.empty() && + target_namespace_ids_.count(data.namespace_id()) == 0) { + // Doesn't match one of the specified namespaces. Keep searching + continue; + } - if (!options_.schema_types.empty() && - target_schema_type_ids_.count(data.schema_type_id()) == 0) { - // Doesn't match one of the specified schema types. Keep searching - return Advance(); - } + if (!options_.schema_types.empty() && + target_schema_type_ids_.count(data.schema_type_id()) == 0) { + // Doesn't match one of the specified schema types. Keep searching + continue; + } - if (current_time_milliseconds_ >= data.expiration_timestamp_ms()) { - // Current time has exceeded the document's expiration time - return Advance(); + // Satisfied all our specified filters + doc_hit_info_ = delegate_->doc_hit_info(); + hit_intersect_section_ids_mask_ = + delegate_->hit_intersect_section_ids_mask(); + return libtextclassifier3::Status::OK; } - // Satisfied all our specified filters - doc_hit_info_ = delegate_->doc_hit_info(); - hit_intersect_section_ids_mask_ = delegate_->hit_intersect_section_ids_mask(); - return libtextclassifier3::Status::OK; + // Didn't find anything on the delegate iterator. + doc_hit_info_ = DocHitInfo(kInvalidDocumentId); + hit_intersect_section_ids_mask_ = kSectionIdMaskNone; + return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator"); } int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const { diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h index 9cee74c..5051607 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter.h +++ b/icing/index/iterator/doc-hit-info-iterator-filter.h @@ -27,7 +27,6 @@ #include "icing/schema/schema-store.h" #include "icing/store/document-store.h" #include "icing/store/namespace-id.h" -#include "icing/util/clock.h" namespace icing { namespace lib { @@ -57,7 +56,7 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator { explicit DocHitInfoIteratorFilter( std::unique_ptr<DocHitInfoIterator> delegate, const DocumentStore* document_store, const SchemaStore* schema_store, - const Clock* clock, const Options& options); + const Options& options); libtextclassifier3::Status Advance() override; @@ -81,7 +80,6 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator { const Options options_; std::unordered_set<NamespaceId> target_namespace_ids_; std::unordered_set<SchemaTypeId> target_schema_type_ids_; - const int64_t current_time_milliseconds_; }; } // namespace lib diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc index e0a8cd0..f80d1ea 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc @@ -28,6 +28,7 @@ #include "icing/index/iterator/doc-hit-info-iterator-test-util.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/proto/document.pb.h" +#include "icing/schema-builder.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" @@ -59,10 +60,10 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test { test_document3_ = DocumentBuilder().SetKey("icing", "email/3").SetSchema("email").Build(); - SchemaProto schema; - auto type_config = schema.add_types(); - type_config->set_schema_type("email"); - + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email")) + .Build(); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); @@ -100,9 +101,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, EmptyOriginalIterator) { std::unique_ptr<DocHitInfoIterator> original_iterator_empty = std::make_unique<DocHitInfoIteratorDummy>(); - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator_empty), document_store_.get(), - schema_store_.get(), &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } @@ -124,9 +125,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) { std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1, document_id3)); @@ -150,9 +151,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NonExistingDocumentsAreFiltered) { std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1, document_id2, document_id3)); @@ -163,9 +164,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NegativeDocumentIdIsIgnored) { std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(filtered_iterator.Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); @@ -177,9 +178,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, InvalidDocumentIdIsIgnored) { std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(filtered_iterator.Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); @@ -194,9 +195,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, GreaterThanMaxDocumentIdIsIgnored) { std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(filtered_iterator.Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); @@ -226,10 +227,10 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test { .SetSchema("email") .Build(); - SchemaProto schema; - auto type_config = schema.add_types(); - type_config->set_schema_type("email"); - + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email")) + .Build(); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); @@ -270,9 +271,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, EmptyOriginalIterator) { std::make_unique<DocHitInfoIteratorDummy>(); options_.namespaces = std::vector<std::string_view>{}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator_empty), document_store_.get(), - schema_store_.get(), &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } @@ -288,9 +289,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } @@ -305,9 +306,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, NoNamespacesReturnsAll) { std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.namespaces = std::vector<std::string_view>{}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1)); } @@ -329,9 +330,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.namespaces = std::vector<std::string_view>{namespace1_}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1, document_id2)); @@ -355,9 +356,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) { std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1, document_id2, document_id4)); @@ -379,14 +380,12 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test { document4_schema1_ = DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build(); - SchemaProto schema; - auto type_config = schema.add_types(); - type_config->set_schema_type(schema1_); - type_config = schema.add_types(); - type_config->set_schema_type(schema2_); - type_config = schema.add_types(); - type_config->set_schema_type(schema3_); - + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType(schema1_)) + .AddType(SchemaTypeConfigBuilder().SetType(schema2_)) + .AddType(SchemaTypeConfigBuilder().SetType(schema3_)) + .Build(); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); @@ -427,9 +426,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, EmptyOriginalIterator) { std::make_unique<DocHitInfoIteratorDummy>(); options_.schema_types = std::vector<std::string_view>{}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator_empty), document_store_.get(), - schema_store_.get(), &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } @@ -446,9 +445,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, options_.schema_types = std::vector<std::string_view>{"nonexistent_schema_type"}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } @@ -463,9 +462,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, NoSchemaTypesReturnsAll) { std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.schema_types = std::vector<std::string_view>{}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1)); } @@ -484,9 +483,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.schema_types = std::vector<std::string_view>{schema1_}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1)); } @@ -507,9 +506,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) { std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); options_.schema_types = std::vector<std::string_view>{schema2_, schema3_}; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id2, document_id3)); @@ -523,10 +522,10 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test { void SetUp() override { filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); - SchemaProto schema; - auto type_config = schema.add_types(); - type_config->set_schema_type(email_schema_); - + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType(email_schema_)) + .Build(); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); @@ -557,6 +556,16 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test { }; TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) { + // Arbitrary value + fake_clock_.SetSystemTimeMilliseconds(100); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + // Insert a document DocumentProto document = DocumentBuilder() .SetKey("namespace", "1") @@ -565,23 +574,30 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) { .SetTtlMs(0) .Build(); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(document)); + document_store->Put(document)); std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)}; std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - // Arbitrary value - fake_clock_.SetSystemTimeMilliseconds(100); - - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1)); } TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) { + // Arbitrary value, but must be less than document's creation_timestamp + ttl + fake_clock_.SetSystemTimeMilliseconds(50); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + // Insert a document DocumentProto document = DocumentBuilder() .SetKey("namespace", "1") @@ -590,92 +606,84 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) { .SetTtlMs(100) .Build(); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(document)); + document_store->Put(document)); std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)}; std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - // Arbitrary value, but must be less than document's creation_timestamp + ttl - fake_clock_.SetSystemTimeMilliseconds(50); - - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1)); } TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) { + // Current time is exactly the document's creation_timestamp + ttl + fake_clock_.SetSystemTimeMilliseconds(150); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + // Insert a document DocumentProto document = DocumentBuilder() .SetKey("namespace", "1") .SetSchema(email_schema_) - .SetCreationTimestampMs(0) + .SetCreationTimestampMs(50) .SetTtlMs(100) .Build(); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(document)); + document_store->Put(document)); std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)}; std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - // Current time is exactly the document's creation_timestamp + ttl - fake_clock_.SetSystemTimeMilliseconds(100); - - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) { + // Arbitrary value, but must be greater than the document's + // creation_timestamp + ttl + fake_clock_.SetSystemTimeMilliseconds(151); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + // Insert a document DocumentProto document = DocumentBuilder() .SetKey("namespace", "1") .SetSchema(email_schema_) - .SetCreationTimestampMs(0) + .SetCreationTimestampMs(50) .SetTtlMs(100) .Build(); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(document)); + document_store->Put(document)); std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)}; std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - // Arbitrary value, but must be greater than the document's - // creation_timestamp + ttl - fake_clock_.SetSystemTimeMilliseconds(101); - - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store.get(), + schema_store_.get(), options_); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); } -TEST_F(DocHitInfoIteratorExpirationFilterTest, - InvalidTimeFiltersReturnsInternalError) { - // Put something in the original iterator so we don't get a ResourceExhausted - // error - std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)}; - std::unique_ptr<DocHitInfoIterator> original_iterator = - std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - - // -1 is an invalid timestamp - fake_clock_.SetSystemTimeMilliseconds(-1); - - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options_); - - EXPECT_THAT(filtered_iterator.Advance(), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); -} - class DocHitInfoIteratorFilterTest : public ::testing::Test { protected: DocHitInfoIteratorFilterTest() : test_dir_(GetTestTempDir() + "/icing") {} @@ -709,16 +717,15 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test { document5_namespace1_schema1_ = DocumentBuilder() .SetKey(namespace1_, "5") .SetSchema(schema1_) - .SetCreationTimestampMs(0) + .SetCreationTimestampMs(1) .SetTtlMs(100) .Build(); - SchemaProto schema; - auto type_config = schema.add_types(); - type_config->set_schema_type(schema1_); - type_config = schema.add_types(); - type_config->set_schema_type(schema2_); - + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType(schema1_)) + .AddType(SchemaTypeConfigBuilder().SetType(schema2_)) + .Build(); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); @@ -756,26 +763,36 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test { }; TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) { + // Filters out document5 since it's expired + fake_clock_.SetSystemTimeMilliseconds(199); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_id1, - document_store_->Put(document1_namespace1_schema1_)); + document_store->Put(document1_namespace1_schema1_)); ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_id2, - document_store_->Put(document2_namespace1_schema1_)); + document_store->Put(document2_namespace1_schema1_)); ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_id3, - document_store_->Put(document3_namespace2_schema1_)); + document_store->Put(document3_namespace2_schema1_)); ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_id4, - document_store_->Put(document4_namespace1_schema2_)); + document_store->Put(document4_namespace1_schema2_)); ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_id5, - document_store_->Put(document5_namespace1_schema1_)); + document_store->Put(document5_namespace1_schema1_)); // Deletes document2, causing it to be filtered out ICING_ASSERT_OK( - document_store_->Delete(document2_namespace1_schema1_.namespace_(), - document2_namespace1_schema1_.uri())); + document_store->Delete(document2_namespace1_schema1_.namespace_(), + document2_namespace1_schema1_.uri())); std::vector<DocHitInfo> doc_hit_infos = { DocHitInfo(document_id1), DocHitInfo(document_id2), @@ -793,13 +810,9 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) { // Filters out document4 by schema type options.schema_types = std::vector<std::string_view>{schema1_}; - // Filters out document5 since it's expired - FakeClock fake_clock; - fake_clock.SetSystemTimeMilliseconds(199); - - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock, options); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store.get(), + schema_store_.get(), options); EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1)); } @@ -830,9 +843,9 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) { std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); DocHitInfoIteratorFilter::Options options; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options); EXPECT_THAT(GetDocHitInfos(&filtered_iterator), ElementsAre(EqualsDocHitInfo(document_id1, section_ids1), @@ -845,9 +858,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) { original_iterator->SetNumBlocksInspected(5); DocHitInfoIteratorFilter::Options options; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options); EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5)); } @@ -857,9 +870,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) { original_iterator->SetNumLeafAdvanceCalls(6); DocHitInfoIteratorFilter::Options options; - DocHitInfoIteratorFilter filtered_iterator( - std::move(original_iterator), document_store_.get(), schema_store_.get(), - &fake_clock_, options); + DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options); EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6)); } diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc index e1ece5c..8fb3659 100644 --- a/icing/index/iterator/doc-hit-info-iterator-not.cc +++ b/icing/index/iterator/doc-hit-info-iterator-not.cc @@ -35,30 +35,29 @@ DocHitInfoIteratorNot::DocHitInfoIteratorNot( DocHitInfoIteratorAllDocumentId(document_id_limit)) {} libtextclassifier3::Status DocHitInfoIteratorNot::Advance() { - if (!all_document_id_iterator_.Advance().ok()) { - doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - return absl_ports::ResourceExhaustedError( - "No more DocHitInfos in iterator"); - } + while (all_document_id_iterator_.Advance().ok()) { + if (all_document_id_iterator_.doc_hit_info().document_id() < + to_be_excluded_->doc_hit_info().document_id()) { + // Since DocumentIds are returned from DocHitInfoIterators in decreasing + // order, we have passed the last NOT result if we're smaller than its + // DocumentId. Advance the NOT result if so. + to_be_excluded_->Advance().IgnoreError(); + } - if (all_document_id_iterator_.doc_hit_info().document_id() < - to_be_excluded_->doc_hit_info().document_id()) { - // Since DocumentIds are returned from DocHitInfoIterators in decreasing - // order, we have passed the last NOT result if we're smaller than its - // DocumentId. Advance the NOT result if so. - to_be_excluded_->Advance().IgnoreError(); - } + if (all_document_id_iterator_.doc_hit_info().document_id() == + to_be_excluded_->doc_hit_info().document_id()) { + // This is a NOT result, skip and Advance to the next result. + continue; + } - if (all_document_id_iterator_.doc_hit_info().document_id() == - to_be_excluded_->doc_hit_info().document_id()) { - // This is a NOT result, skip and Advance to the next result. - return Advance(); + // No errors, we've found a valid result + doc_hit_info_ = all_document_id_iterator_.doc_hit_info(); + return libtextclassifier3::Status::OK; } - // No errors, we've found a valid result - doc_hit_info_ = all_document_id_iterator_.doc_hit_info(); - - return libtextclassifier3::Status::OK; + // Didn't find a hit, return with error + doc_hit_info_ = DocHitInfo(kInvalidDocumentId); + return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator"); } int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const { diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc index e6ee8e3..034c8cb 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc @@ -45,56 +45,54 @@ DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( target_section_(target_section) {} libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { - if (!delegate_->Advance().ok()) { - // Didn't find anything on the delegate iterator. - doc_hit_info_ = DocHitInfo(kInvalidDocumentId); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; - return absl_ports::ResourceExhaustedError( - "No more DocHitInfos in iterator"); - } - - DocumentId document_id = delegate_->doc_hit_info().document_id(); - - SectionIdMask section_id_mask = - delegate_->doc_hit_info().hit_section_ids_mask(); - - auto data_or = document_store_.GetDocumentFilterData(document_id); - if (!data_or.ok()) { - // Ran into some error retrieving information on this hit, skip - return Advance(); - } + while (delegate_->Advance().ok()) { + DocumentId document_id = delegate_->doc_hit_info().document_id(); - // Guaranteed that the DocumentFilterData exists at this point - DocumentFilterData data = std::move(data_or).ValueOrDie(); - SchemaTypeId schema_type_id = data.schema_type_id(); + SectionIdMask section_id_mask = + delegate_->doc_hit_info().hit_section_ids_mask(); - // A hit can be in multiple sections at once, need to check that at least one - // of the confirmed section ids match the name of the target section - while (section_id_mask != 0) { - // There was a hit in this section id - SectionId section_id = __builtin_ctz(section_id_mask); - - auto section_metadata_or = - schema_store_.GetSectionMetadata(schema_type_id, section_id); - - if (section_metadata_or.ok()) { - const SectionMetadata* section_metadata = - section_metadata_or.ValueOrDie(); + auto data_or = document_store_.GetDocumentFilterData(document_id); + if (!data_or.ok()) { + // Ran into some error retrieving information on this hit, skip + continue; + } - if (section_metadata->path == target_section_) { - // The hit was in the target section name, return OK/found - doc_hit_info_ = delegate_->doc_hit_info(); - hit_intersect_section_ids_mask_ = 1u << section_id; - return libtextclassifier3::Status::OK; + // Guaranteed that the DocumentFilterData exists at this point + DocumentFilterData data = std::move(data_or).ValueOrDie(); + SchemaTypeId schema_type_id = data.schema_type_id(); + + // A hit can be in multiple sections at once, need to check that at least + // one of the confirmed section ids match the name of the target section + while (section_id_mask != 0) { + // There was a hit in this section id + SectionId section_id = __builtin_ctz(section_id_mask); + + auto section_metadata_or = + schema_store_.GetSectionMetadata(schema_type_id, section_id); + + if (section_metadata_or.ok()) { + const SectionMetadata* section_metadata = + section_metadata_or.ValueOrDie(); + + if (section_metadata->path == target_section_) { + // The hit was in the target section name, return OK/found + doc_hit_info_ = delegate_->doc_hit_info(); + hit_intersect_section_ids_mask_ = 1u << section_id; + return libtextclassifier3::Status::OK; + } } + + // Mark this section as checked + section_id_mask &= ~(1U << section_id); } - // Mark this section as checked - section_id_mask &= ~(1U << section_id); + // Didn't find a matching section name for this hit. Continue. } - // Didn't find a matching section name for this hit, go to the next hit - return Advance(); + // Didn't find anything on the delegate iterator. + doc_hit_info_ = DocHitInfo(kInvalidDocumentId); + hit_intersect_section_ids_mask_ = kSectionIdMaskNone; + return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator"); } int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const { diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc index 21b3f8f..43a846b 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc @@ -29,6 +29,7 @@ #include "icing/proto/document.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/term.pb.h" +#include "icing/schema-builder.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" @@ -47,6 +48,14 @@ using ::testing::ElementsAreArray; using ::testing::Eq; using ::testing::IsEmpty; +constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL = + PropertyConfigProto_Cardinality_Code_OPTIONAL; + +constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN = + StringIndexingConfig_TokenizerType_Code_PLAIN; + +constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY; + class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test { protected: DocHitInfoIteratorSectionRestrictTest() @@ -57,18 +66,18 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test { document_ = DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build(); - auto type_config = schema_.add_types(); - type_config->set_schema_type("email"); - - // Add an indexed property so we generate section metadata on it - auto property = type_config->add_properties(); - property->set_property_name(indexed_property_); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::EXACT_ONLY); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); + schema_ = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("email") + // Add an indexed property so we generate section + // metadata on it + .AddProperty( + PropertyConfigBuilder() + .SetName(indexed_property_) + .SetDataTypeString(MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); // First and only indexed property, so it gets the first id of 0 indexed_section_id_ = 0; diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h index afb298b..bf90202 100644 --- a/icing/index/iterator/doc-hit-info-iterator.h +++ b/icing/index/iterator/doc-hit-info-iterator.h @@ -66,6 +66,8 @@ class DocHitInfoIterator { // Returns: // OK if was able to advance to a new document_id. + // INVALID_ARGUMENT if there are less than 2 iterators for an AND/OR + // iterator // RESOUCE_EXHAUSTED if we've run out of document_ids to iterate over virtual libtextclassifier3::Status Advance() = 0; diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc index 90e4888..f975989 100644 --- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc +++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc @@ -14,15 +14,15 @@ #include <vector> +#include "testing/base/public/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator-and.h" #include "icing/index/iterator/doc-hit-info-iterator-test-util.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" -#include "testing/base/public/benchmark.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" namespace icing { namespace lib { |