aboutsummaryrefslogtreecommitdiff
path: root/icing/index/iterator
diff options
context:
space:
mode:
authorAlexander Dorokhine <adorokhine@google.com>2021-08-13 11:07:18 -0700
committerAlexander Dorokhine <adorokhine@google.com>2021-08-13 11:07:18 -0700
commit71eeeb436ec794daef5187d12ca9aba2c5773d7d (patch)
tree5129f3a74eb616eb2856c6db3392011c1ce384f8 /icing/index/iterator
parent40c7203fab1d491f62f223fad10eddc180605729 (diff)
downloadicing-71eeeb436ec794daef5187d12ca9aba2c5773d7d.tar.gz
Merge androidx-platform-dev/external/icing upstream-master into upstream-master
Change-Id: Id39bea14b3a3d378d722dc552e4f3bd4249f3f94
Diffstat (limited to 'icing/index/iterator')
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.cc1
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc93
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.h4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc309
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-not.cc39
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc82
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc33
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h2
-rw-r--r--icing/index/iterator/doc-hit-info-iterator_benchmark.cc6
9 files changed, 288 insertions, 281 deletions
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 66f87bd..39aa969 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -162,6 +162,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
DocumentId unused;
ICING_ASSIGN_OR_RETURN(
unused, AdvanceTo(iterator.get(), potential_document_id));
+ (void)unused; // Silence unused warning.
}
if (iterator->doc_hit_info().document_id() == potential_document_id) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index c6cb86d..933f9b5 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -31,7 +31,6 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
-#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -39,12 +38,11 @@ namespace lib {
DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const Clock* clock, const Options& options)
+ const Options& options)
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- options_(options),
- current_time_milliseconds_(clock->GetSystemTimeMilliseconds()) {
+ options_(options) {
// Precompute all the NamespaceIds
for (std::string_view name_space : options_.namespaces) {
auto namespace_id_or = document_store_.GetNamespaceId(name_space);
@@ -67,61 +65,50 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
}
libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
- if (!delegate_->Advance().ok()) {
- // Didn't find anything on the delegate iterator.
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
- return absl_ports::ResourceExhaustedError(
- "No more DocHitInfos in iterator");
- }
-
- if (current_time_milliseconds_ < 0) {
- // This shouldn't happen, but we add a sanity check here for any unknown
- // errors.
- return absl_ports::InternalError(
- "Couldn't get current time. Try again in a bit");
- }
-
- if (!document_store_.DoesDocumentExist(
- delegate_->doc_hit_info().document_id())) {
- // Document doesn't exist, keep searching
- return Advance();
- }
+ while (delegate_->Advance().ok()) {
+ if (!document_store_.DoesDocumentExist(
+ delegate_->doc_hit_info().document_id())) {
+ // Document doesn't exist, keep searching. This handles deletions and
+ // expired documents.
+ continue;
+ }
- // Try to get the DocumentFilterData
- auto document_filter_data_or = document_store_.GetDocumentFilterData(
- delegate_->doc_hit_info().document_id());
- if (!document_filter_data_or.ok()) {
- // Didn't find the DocumentFilterData in the filter cache. This could be
- // because the DocumentId isn't valid or the filter cache is in some invalid
- // state. This is bad, but not the query's responsibility to fix, so just
- // skip this result for now.
- return Advance();
- }
- // We should be guaranteed that this exists now.
- DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
+ // Try to get the DocumentFilterData
+ auto document_filter_data_or = document_store_.GetDocumentFilterData(
+ delegate_->doc_hit_info().document_id());
+ if (!document_filter_data_or.ok()) {
+ // Didn't find the DocumentFilterData in the filter cache. This could be
+ // because the DocumentId isn't valid or the filter cache is in some
+ // invalid state. This is bad, but not the query's responsibility to fix,
+ // so just skip this result for now.
+ continue;
+ }
+ // We should be guaranteed that this exists now.
+ DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
- if (!options_.namespaces.empty() &&
- target_namespace_ids_.count(data.namespace_id()) == 0) {
- // Doesn't match one of the specified namespaces. Keep searching
- return Advance();
- }
+ if (!options_.namespaces.empty() &&
+ target_namespace_ids_.count(data.namespace_id()) == 0) {
+ // Doesn't match one of the specified namespaces. Keep searching
+ continue;
+ }
- if (!options_.schema_types.empty() &&
- target_schema_type_ids_.count(data.schema_type_id()) == 0) {
- // Doesn't match one of the specified schema types. Keep searching
- return Advance();
- }
+ if (!options_.schema_types.empty() &&
+ target_schema_type_ids_.count(data.schema_type_id()) == 0) {
+ // Doesn't match one of the specified schema types. Keep searching
+ continue;
+ }
- if (current_time_milliseconds_ >= data.expiration_timestamp_ms()) {
- // Current time has exceeded the document's expiration time
- return Advance();
+ // Satisfied all our specified filters
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ =
+ delegate_->hit_intersect_section_ids_mask();
+ return libtextclassifier3::Status::OK;
}
- // Satisfied all our specified filters
- doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ = delegate_->hit_intersect_section_ids_mask();
- return libtextclassifier3::Status::OK;
+ // Didn't find anything on the delegate iterator.
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index 9cee74c..5051607 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -27,7 +27,6 @@
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
#include "icing/store/namespace-id.h"
-#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -57,7 +56,7 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
explicit DocHitInfoIteratorFilter(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const Clock* clock, const Options& options);
+ const Options& options);
libtextclassifier3::Status Advance() override;
@@ -81,7 +80,6 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
const Options options_;
std::unordered_set<NamespaceId> target_namespace_ids_;
std::unordered_set<SchemaTypeId> target_schema_type_ids_;
- const int64_t current_time_milliseconds_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index e0a8cd0..f80d1ea 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -28,6 +28,7 @@
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/proto/document.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -59,10 +60,10 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
test_document3_ =
DocumentBuilder().SetKey("icing", "email/3").SetSchema("email").Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -100,9 +101,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, EmptyOriginalIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -124,9 +125,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id3));
@@ -150,9 +151,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NonExistingDocumentsAreFiltered) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2, document_id3));
@@ -163,9 +164,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NegativeDocumentIdIsIgnored) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -177,9 +178,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, InvalidDocumentIdIsIgnored) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -194,9 +195,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, GreaterThanMaxDocumentIdIsIgnored) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -226,10 +227,10 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
.SetSchema("email")
.Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -270,9 +271,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, EmptyOriginalIterator) {
std::make_unique<DocHitInfoIteratorDummy>();
options_.namespaces = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -288,9 +289,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -305,9 +306,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, NoNamespacesReturnsAll) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -329,9 +330,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{namespace1_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2));
@@ -355,9 +356,9 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2, document_id4));
@@ -379,14 +380,12 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
document4_schema1_ =
DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type(schema1_);
- type_config = schema.add_types();
- type_config->set_schema_type(schema2_);
- type_config = schema.add_types();
- type_config->set_schema_type(schema3_);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+ .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+ .AddType(SchemaTypeConfigBuilder().SetType(schema3_))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -427,9 +426,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, EmptyOriginalIterator) {
std::make_unique<DocHitInfoIteratorDummy>();
options_.schema_types = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -446,9 +445,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
options_.schema_types =
std::vector<std::string_view>{"nonexistent_schema_type"};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -463,9 +462,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, NoSchemaTypesReturnsAll) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -484,9 +483,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{schema1_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -507,9 +506,9 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id2, document_id3));
@@ -523,10 +522,10 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type(email_schema_);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(email_schema_))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -557,6 +556,16 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
};
TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
+ // Arbitrary value
+ fake_clock_.SetSystemTimeMilliseconds(100);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
@@ -565,23 +574,30 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
.SetTtlMs(0)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Arbitrary value
- fake_clock_.SetSystemTimeMilliseconds(100);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
+ // Arbitrary value, but must be less than document's creation_timestamp + ttl
+ fake_clock_.SetSystemTimeMilliseconds(50);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
@@ -590,92 +606,84 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
.SetTtlMs(100)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Arbitrary value, but must be less than document's creation_timestamp + ttl
- fake_clock_.SetSystemTimeMilliseconds(50);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
+ // Current time is exactly the document's creation_timestamp + ttl
+ fake_clock_.SetSystemTimeMilliseconds(150);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema(email_schema_)
- .SetCreationTimestampMs(0)
+ .SetCreationTimestampMs(50)
.SetTtlMs(100)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Current time is exactly the document's creation_timestamp + ttl
- fake_clock_.SetSystemTimeMilliseconds(100);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
+ // Arbitrary value, but must be greater than the document's
+ // creation_timestamp + ttl
+ fake_clock_.SetSystemTimeMilliseconds(151);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema(email_schema_)
- .SetCreationTimestampMs(0)
+ .SetCreationTimestampMs(50)
.SetTtlMs(100)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Arbitrary value, but must be greater than the document's
- // creation_timestamp + ttl
- fake_clock_.SetSystemTimeMilliseconds(101);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
-TEST_F(DocHitInfoIteratorExpirationFilterTest,
- InvalidTimeFiltersReturnsInternalError) {
- // Put something in the original iterator so we don't get a ResourceExhausted
- // error
- std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
- std::unique_ptr<DocHitInfoIterator> original_iterator =
- std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
-
- // -1 is an invalid timestamp
- fake_clock_.SetSystemTimeMilliseconds(-1);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
-
- EXPECT_THAT(filtered_iterator.Advance(),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
class DocHitInfoIteratorFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorFilterTest() : test_dir_(GetTestTempDir() + "/icing") {}
@@ -709,16 +717,15 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
document5_namespace1_schema1_ = DocumentBuilder()
.SetKey(namespace1_, "5")
.SetSchema(schema1_)
- .SetCreationTimestampMs(0)
+ .SetCreationTimestampMs(1)
.SetTtlMs(100)
.Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type(schema1_);
- type_config = schema.add_types();
- type_config->set_schema_type(schema2_);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+ .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -756,26 +763,36 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
};
TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
+ // Filters out document5 since it's expired
+ fake_clock_.SetSystemTimeMilliseconds(199);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
- document_store_->Put(document1_namespace1_schema1_));
+ document_store->Put(document1_namespace1_schema1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id2,
- document_store_->Put(document2_namespace1_schema1_));
+ document_store->Put(document2_namespace1_schema1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id3,
- document_store_->Put(document3_namespace2_schema1_));
+ document_store->Put(document3_namespace2_schema1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id4,
- document_store_->Put(document4_namespace1_schema2_));
+ document_store->Put(document4_namespace1_schema2_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id5,
- document_store_->Put(document5_namespace1_schema1_));
+ document_store->Put(document5_namespace1_schema1_));
// Deletes document2, causing it to be filtered out
ICING_ASSERT_OK(
- document_store_->Delete(document2_namespace1_schema1_.namespace_(),
- document2_namespace1_schema1_.uri()));
+ document_store->Delete(document2_namespace1_schema1_.namespace_(),
+ document2_namespace1_schema1_.uri()));
std::vector<DocHitInfo> doc_hit_infos = {
DocHitInfo(document_id1), DocHitInfo(document_id2),
@@ -793,13 +810,9 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
// Filters out document4 by schema type
options.schema_types = std::vector<std::string_view>{schema1_};
- // Filters out document5 since it's expired
- FakeClock fake_clock;
- fake_clock.SetSystemTimeMilliseconds(199);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -830,9 +843,9 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
EXPECT_THAT(GetDocHitInfos(&filtered_iterator),
ElementsAre(EqualsDocHitInfo(document_id1, section_ids1),
@@ -845,9 +858,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) {
original_iterator->SetNumBlocksInspected(5);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
}
@@ -857,9 +870,9 @@ TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
original_iterator->SetNumLeafAdvanceCalls(6);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index e1ece5c..8fb3659 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -35,30 +35,29 @@ DocHitInfoIteratorNot::DocHitInfoIteratorNot(
DocHitInfoIteratorAllDocumentId(document_id_limit)) {}
libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
- if (!all_document_id_iterator_.Advance().ok()) {
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- return absl_ports::ResourceExhaustedError(
- "No more DocHitInfos in iterator");
- }
+ while (all_document_id_iterator_.Advance().ok()) {
+ if (all_document_id_iterator_.doc_hit_info().document_id() <
+ to_be_excluded_->doc_hit_info().document_id()) {
+ // Since DocumentIds are returned from DocHitInfoIterators in decreasing
+ // order, we have passed the last NOT result if we're smaller than its
+ // DocumentId. Advance the NOT result if so.
+ to_be_excluded_->Advance().IgnoreError();
+ }
- if (all_document_id_iterator_.doc_hit_info().document_id() <
- to_be_excluded_->doc_hit_info().document_id()) {
- // Since DocumentIds are returned from DocHitInfoIterators in decreasing
- // order, we have passed the last NOT result if we're smaller than its
- // DocumentId. Advance the NOT result if so.
- to_be_excluded_->Advance().IgnoreError();
- }
+ if (all_document_id_iterator_.doc_hit_info().document_id() ==
+ to_be_excluded_->doc_hit_info().document_id()) {
+ // This is a NOT result, skip and Advance to the next result.
+ continue;
+ }
- if (all_document_id_iterator_.doc_hit_info().document_id() ==
- to_be_excluded_->doc_hit_info().document_id()) {
- // This is a NOT result, skip and Advance to the next result.
- return Advance();
+ // No errors, we've found a valid result
+ doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
+ return libtextclassifier3::Status::OK;
}
- // No errors, we've found a valid result
- doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
-
- return libtextclassifier3::Status::OK;
+ // Didn't find a hit, return with error
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index e6ee8e3..034c8cb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -45,56 +45,54 @@ DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
target_section_(target_section) {}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
- if (!delegate_->Advance().ok()) {
- // Didn't find anything on the delegate iterator.
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
- return absl_ports::ResourceExhaustedError(
- "No more DocHitInfos in iterator");
- }
-
- DocumentId document_id = delegate_->doc_hit_info().document_id();
-
- SectionIdMask section_id_mask =
- delegate_->doc_hit_info().hit_section_ids_mask();
-
- auto data_or = document_store_.GetDocumentFilterData(document_id);
- if (!data_or.ok()) {
- // Ran into some error retrieving information on this hit, skip
- return Advance();
- }
+ while (delegate_->Advance().ok()) {
+ DocumentId document_id = delegate_->doc_hit_info().document_id();
- // Guaranteed that the DocumentFilterData exists at this point
- DocumentFilterData data = std::move(data_or).ValueOrDie();
- SchemaTypeId schema_type_id = data.schema_type_id();
+ SectionIdMask section_id_mask =
+ delegate_->doc_hit_info().hit_section_ids_mask();
- // A hit can be in multiple sections at once, need to check that at least one
- // of the confirmed section ids match the name of the target section
- while (section_id_mask != 0) {
- // There was a hit in this section id
- SectionId section_id = __builtin_ctz(section_id_mask);
-
- auto section_metadata_or =
- schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
- if (section_metadata_or.ok()) {
- const SectionMetadata* section_metadata =
- section_metadata_or.ValueOrDie();
+ auto data_or = document_store_.GetDocumentFilterData(document_id);
+ if (!data_or.ok()) {
+ // Ran into some error retrieving information on this hit, skip
+ continue;
+ }
- if (section_metadata->path == target_section_) {
- // The hit was in the target section name, return OK/found
- doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ = 1u << section_id;
- return libtextclassifier3::Status::OK;
+ // Guaranteed that the DocumentFilterData exists at this point
+ DocumentFilterData data = std::move(data_or).ValueOrDie();
+ SchemaTypeId schema_type_id = data.schema_type_id();
+
+ // A hit can be in multiple sections at once, need to check that at least
+ // one of the confirmed section ids match the name of the target section
+ while (section_id_mask != 0) {
+ // There was a hit in this section id
+ SectionId section_id = __builtin_ctz(section_id_mask);
+
+ auto section_metadata_or =
+ schema_store_.GetSectionMetadata(schema_type_id, section_id);
+
+ if (section_metadata_or.ok()) {
+ const SectionMetadata* section_metadata =
+ section_metadata_or.ValueOrDie();
+
+ if (section_metadata->path == target_section_) {
+ // The hit was in the target section name, return OK/found
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ = 1u << section_id;
+ return libtextclassifier3::Status::OK;
+ }
}
+
+ // Mark this section as checked
+ section_id_mask &= ~(1U << section_id);
}
- // Mark this section as checked
- section_id_mask &= ~(1U << section_id);
+ // Didn't find a matching section name for this hit. Continue.
}
- // Didn't find a matching section name for this hit, go to the next hit
- return Advance();
+ // Didn't find anything on the delegate iterator.
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 21b3f8f..43a846b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -29,6 +29,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -47,6 +48,14 @@ using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::IsEmpty;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
protected:
DocHitInfoIteratorSectionRestrictTest()
@@ -57,18 +66,18 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
document_ =
DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
- auto type_config = schema_.add_types();
- type_config->set_schema_type("email");
-
- // Add an indexed property so we generate section metadata on it
- auto property = type_config->add_properties();
- property->set_property_name(indexed_property_);
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ schema_ = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(indexed_property_)
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// First and only indexed property, so it gets the first id of 0
indexed_section_id_ = 0;
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index afb298b..bf90202 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -66,6 +66,8 @@ class DocHitInfoIterator {
// Returns:
// OK if was able to advance to a new document_id.
+ // INVALID_ARGUMENT if there are less than 2 iterators for an AND/OR
+ // iterator
// RESOUCE_EXHAUSTED if we've run out of document_ids to iterate over
virtual libtextclassifier3::Status Advance() = 0;
diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
index 90e4888..f975989 100644
--- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
+++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
@@ -14,15 +14,15 @@
#include <vector>
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
-#include "testing/base/public/benchmark.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
namespace icing {
namespace lib {