aboutsummaryrefslogtreecommitdiff
path: root/icing/index
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2023-05-11 06:22:44 +0000
committerTim Barron <tjbarron@google.com>2023-05-11 15:45:06 +0000
commitfc9e6aac9c62d4546cb25548e1bbb317b7a4fd9a (patch)
tree061f4d194144ae4d5f81e6b56c148ddb5495e694 /icing/index
parenta7d57e98ea7168d66cf01ace85598e33d5e9e5db (diff)
downloadicing-fc9e6aac9c62d4546cb25548e1bbb317b7a4fd9a.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== Modify the definition of propertyDefined: ======================================================================== Remove default args in SchemaStore::SetSchema and fix calls ======================================================================== Add allow_circular_schema_definitions flag ======================================================================== Onboard version detection to Icing ======================================================================== Add version util to help read/write version info ======================================================================== Add support for the overlay schema. ======================================================================== Allow cycles in schema-property-iterator ======================================================================== Add joinable properties into schema definition cycle restrictions. ======================================================================== Loosen circular references restriction for Schema Definitions. ======================================================================== Implement BackupSchemaProducer to generate a backup schema ======================================================================== Minor fix: remove a redundant log ======================================================================== Allow schema types to inherit from more than one parent ======================================================================== allow nested document properties to accept documents of subtype ======================================================================== Support polymorphism for Icing projection in Search and Get API ======================================================================== Add max_joined_child_per_parent into ResultSpec and change behavior ======================================================================== Support Icing schema type polymorphism for the search filter API ======================================================================== Verify that every child type's property set has included all compatible properties from parent types ======================================================================== Add individual type index latency ======================================================================== Build the iterator node for the propertyDefined() custom function ======================================================================== Advance all hits with same doc id from and merge sections once for the same bucket iter ======================================================================== Introduce DocHitInfoIteratorPropertyInSchema for property existence check ======================================================================== Add SchemaUtil::BuildTransitiveInheritanceGraph to build an inheritance map from schema ======================================================================== Introduce a lookup method for a property defined in a schema ======================================================================== Rollback of: Allow LanguageSegmenter::Iterators to declare AccessType. ======================================================================== Adds join info to QueryStatsProto ======================================================================== Bug:280698419 Bug:280698125 Bug:280698121 Bug:280697513 Bug:276349029 Bug:272145329 Bug:270102295 Bug:269295094 Bug:268680462 Bug:265304217 Bug:259744228 Bug:259743562 Bug:256022027 Change-Id: I54cd1d22121c314f8c238d2d49f0809165dc0ca3
Diffstat (limited to 'icing/index')
-rw-r--r--icing/index/index-processor_benchmark.cc4
-rw-r--r--icing/index/index-processor_test.cc18
-rw-r--r--icing/index/index.cc6
-rw-r--r--icing/index/index.h10
-rw-r--r--icing/index/integer-section-indexing-handler.cc14
-rw-r--r--icing/index/integer-section-indexing-handler_test.cc4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc174
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc114
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.h76
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc263
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h7
-rw-r--r--icing/index/main/main-index.cc13
-rw-r--r--icing/index/main/main-index.h10
-rw-r--r--icing/index/numeric/integer-index-storage.cc11
-rw-r--r--icing/index/numeric/integer-index-storage_benchmark.cc124
-rw-r--r--icing/index/numeric/integer-index_test.cc20
-rw-r--r--icing/index/string-section-indexing-handler.cc5
19 files changed, 837 insertions, 51 deletions
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index ee43364..1cbe00d 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -172,7 +172,9 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem,
SchemaProto schema;
CreateFakeTypeConfig(schema.add_types());
- auto set_schema_status = schema_store->SetSchema(schema);
+ auto set_schema_status = schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false);
if (!set_schema_status.ok()) {
ICING_LOG(ERROR) << set_schema_status.status().error_message();
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 9453e58..ed9e856 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -40,7 +40,7 @@
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/string-section-indexing-handler.h"
#include "icing/index/term-property-id.h"
-#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
@@ -267,7 +267,9 @@ class IndexProcessorTest : public Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -291,10 +293,10 @@ class IndexProcessorTest : public Test {
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
qualified_id_joinable_property_indexing_handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
handlers.push_back(std::move(string_section_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
@@ -823,10 +825,10 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
qualified_id_joinable_property_indexing_handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
handlers.push_back(std::move(string_section_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 5cfcd27..19edbb6 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -163,6 +163,12 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
std::move(main_index), filesystem));
}
+/* static */ libtextclassifier3::StatusOr<int> Index::ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ return MainIndex::ReadFlashIndexMagic(filesystem,
+ MakeMainIndexFilepath(base_dir));
+}
+
libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
if (lite_index_->last_added_document_id() != kInvalidDocumentId &&
lite_index_->last_added_document_id() > document_id) {
diff --git a/icing/index/index.h b/icing/index/index.h
index 3200d70..c170278 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -86,6 +86,16 @@ class Index {
const Options& options, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
+ // Reads magic from existing flash (main) index file header. We need this
+ // during Icing initialization phase to determine the version.
+ //
+ // Returns
+ // Valid magic on success
+ // NOT_FOUND if the lite index doesn't exist
+ // INTERNAL on I/O error
+ static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& base_dir);
+
// Clears all files created by the index. Returns OK if all files were
// cleared.
libtextclassifier3::Status Reset() {
diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc
index 584f028..63b09df 100644
--- a/icing/index/integer-section-indexing-handler.cc
+++ b/icing/index/integer-section-indexing-handler.cc
@@ -16,12 +16,19 @@
#include <cstdint>
#include <memory>
+#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
@@ -41,7 +48,7 @@ IntegerSectionIndexingHandler::Create(const Clock* clock,
libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
- // TODO(b/259744228): set integer indexing latency and other stats
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (!IsDocumentIdValid(document_id)) {
return absl_ports::InvalidArgumentError(
@@ -93,6 +100,11 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
}
}
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_integer_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ }
+
return status;
}
diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc
index 895fe57..706856c 100644
--- a/icing/index/integer-section-indexing-handler_test.cc
+++ b/icing/index/integer-section-indexing-handler_test.cc
@@ -156,7 +156,9 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ASSERT_TRUE(
filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index 83a73a4..2c0c2c2 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -55,11 +55,16 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
// Precompute all the SchemaTypeIds
for (std::string_view schema_type : options_.schema_types) {
- auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+ libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+ schema_type_ids_or =
+ schema_store_.GetSchemaTypeIdsWithChildren(schema_type);
// If we can't find the SchemaTypeId, just throw it away
- if (schema_type_id_or.ok()) {
- target_schema_type_ids_.emplace(schema_type_id_or.ValueOrDie());
+ if (schema_type_ids_or.ok()) {
+ const std::unordered_set<SchemaTypeId>* schema_type_ids =
+ schema_type_ids_or.ValueOrDie();
+ target_schema_type_ids_.insert(schema_type_ids->begin(),
+ schema_type_ids->end());
}
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index 0900e1f..4b86cae 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -17,6 +17,7 @@
#include <limits>
#include <memory>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
@@ -80,7 +81,9 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -247,7 +250,9 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -379,30 +384,52 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
protected:
+ static constexpr std::string_view kSchema1 = "email";
+ static constexpr std::string_view kSchema2 = "message";
+ static constexpr std::string_view kSchema3 = "person";
+ static constexpr std::string_view kSchema4 = "artist";
+ static constexpr std::string_view kSchema5 = "emailMessage";
+
DocHitInfoIteratorSchemaTypeFilterTest()
: test_dir_(GetTestTempDir() + "/icing") {}
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- document1_schema1_ =
- DocumentBuilder().SetKey("namespace", "1").SetSchema(schema1_).Build();
- document2_schema2_ =
- DocumentBuilder().SetKey("namespace", "2").SetSchema(schema2_).Build();
- document3_schema3_ =
- DocumentBuilder().SetKey("namespace", "3").SetSchema(schema3_).Build();
- document4_schema1_ =
- DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
+ document1_schema1_ = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema(std::string(kSchema1))
+ .Build();
+ document2_schema2_ = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema(std::string(kSchema2))
+ .Build();
+ document3_schema3_ = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema(std::string(kSchema3))
+ .Build();
+ document4_schema1_ = DocumentBuilder()
+ .SetKey("namespace", "4")
+ .SetSchema(std::string(kSchema1))
+ .Build();
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
- .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
- .AddType(SchemaTypeConfigBuilder().SetType(schema3_))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema1))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema2))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema3))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema4).AddParentType(
+ kSchema3))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(std::string(kSchema5))
+ .AddParentType(kSchema1)
+ .AddParentType(kSchema2))
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -424,9 +451,6 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
FakeClock fake_clock_;
const Filesystem filesystem_;
const std::string test_dir_;
- const std::string schema1_ = "email";
- const std::string schema2_ = "message";
- const std::string schema3_ = "person";
DocumentProto document1_schema1_;
DocumentProto document2_schema2_;
DocumentProto document3_schema3_;
@@ -495,7 +519,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- options_.schema_types = std::vector<std::string_view>{schema1_};
+ options_.schema_types = std::vector<std::string_view>{kSchema1};
DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
document_store_.get(),
schema_store_.get(), options_);
@@ -518,7 +542,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
+ options_.schema_types = std::vector<std::string_view>{kSchema2, kSchema3};
DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
document_store_.get(),
schema_store_.get(), options_);
@@ -527,6 +551,110 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
ElementsAre(document_id2, document_id3));
}
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+ FilterForSchemaTypePolymorphismOk) {
+ // Add some irrelevant documents.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_schema2_));
+
+ // Create a person document and an artist document, where the artist should be
+ // able to be interpreted as a person by polymorphism.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("person")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId artist_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "artist")
+ .SetSchema("artist")
+ .Build()));
+
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id1), DocHitInfo(document_id2),
+ DocHitInfo(person_document_id), DocHitInfo(artist_document_id)};
+
+ // Filters for the "person" type should also include the "artist" type.
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = {"person"};
+ DocHitInfoIteratorFilter filtered_iterator_1(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+ ElementsAre(person_document_id, artist_document_id));
+
+ // Filters for the "artist" type should not include the "person" type.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = {"artist"};
+ DocHitInfoIteratorFilter filtered_iterator_2(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+ ElementsAre(artist_document_id));
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+ FilterForSchemaTypeMultipleParentPolymorphismOk) {
+ // Create an email and a message document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("message")
+ .Build()));
+
+ // Create a emailMessage document, which the should be able to be interpreted
+ // as both an email and a message by polymorphism.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "emailMessage")
+ .SetSchema("emailMessage")
+ .Build()));
+
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(email_document_id), DocHitInfo(message_document_id),
+ DocHitInfo(email_message_document_id)};
+
+ // Filters for the "email" type should also include the "emailMessage" type.
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"email"};
+ DocHitInfoIteratorFilter filtered_iterator_1(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+ ElementsAre(email_document_id, email_message_document_id));
+
+ // Filters for the "message" type should also include the "emailMessage" type.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"message"};
+ DocHitInfoIteratorFilter filtered_iterator_2(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+ ElementsAre(message_document_id, email_message_document_id));
+
+ // Filters for a irrelevant type should return nothing.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"person"};
+ DocHitInfoIteratorFilter filtered_iterator_3(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_3), IsEmpty());
+}
+
class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorExpirationFilterTest()
@@ -542,7 +670,9 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -742,7 +872,9 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
new file mode 100644
index 0000000..5f260a8
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::set<std::string> target_sections)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ target_properties_(std::move(target_sections)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() {
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+
+ // Maps from SchemaTypeId to a bool indicating whether or not the type has
+ // the requested property.
+ std::unordered_map<SchemaTypeId, bool> property_defined_types;
+ while (delegate_->Advance().ok()) {
+ DocumentId document_id = delegate_->doc_hit_info().document_id();
+ auto data_optional =
+ document_store_.GetAliveDocumentFilterData(document_id);
+ if (!data_optional) {
+ // Ran into some error retrieving information on this hit, skip
+ continue;
+ }
+
+ // Guaranteed that the DocumentFilterData exists at this point
+ SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
+ bool valid_match = false;
+ auto itr = property_defined_types.find(schema_type_id);
+ if (itr != property_defined_types.end()) {
+ valid_match = itr->second;
+ } else {
+ for (const auto& property : target_properties_) {
+ if (schema_store_.IsPropertyDefinedInSchema(schema_type_id, property)) {
+ valid_match = true;
+ break;
+ }
+ }
+ property_defined_types[schema_type_id] = valid_match;
+ }
+
+ if (valid_match) {
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ =
+ delegate_->hit_intersect_section_ids_mask();
+ doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ return libtextclassifier3::Status::OK;
+ }
+
+ // The document's schema does not define any properties listed in
+ // target_properties_. Continue.
+ }
+
+ // Didn't find anything on the delegate iterator.
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && {
+ // Don't generate suggestion if the last operator is this custom function.
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is hasPropertyDefined().");
+}
+
+int32_t DocHitInfoIteratorPropertyInSchema::GetNumBlocksInspected() const {
+ return delegate_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorPropertyInSchema::GetNumLeafAdvanceCalls() const {
+ return delegate_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorPropertyInSchema::ToString() const {
+ return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","),
+ "): ", delegate_->ToString());
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
new file mode 100644
index 0000000..35b87e1
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
@@ -0,0 +1,76 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// An iterator that helps filter for DocHitInfos whose schemas define the
+// properties named in target_properties_.
+class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator {
+ public:
+ // Does not take any ownership, and all pointers must refer to valid objects
+ // that outlive the one constructed. The delegate should be at minimum be
+ // a DocHitInfoIteratorAllDocumentId, but other optimizations are possible,
+ // cf. go/icing-property-in-schema-existence.
+ explicit DocHitInfoIteratorPropertyInSchema(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::set<std::string> target_sections);
+
+ libtextclassifier3::Status Advance() override;
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+ int32_t GetNumBlocksInspected() const override;
+
+ int32_t GetNumLeafAdvanceCalls() const override;
+
+ std::string ToString() const override;
+
+ void PopulateMatchedTermsStats(
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ }
+
+ private:
+ std::unique_ptr<DocHitInfoIterator> delegate_;
+ const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
+
+ std::set<std::string> target_properties_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
new file mode 100644
index 0000000..9bffeeb
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
@@ -0,0 +1,263 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class DocHitInfoIteratorPropertyInSchemaTest : public ::testing::Test {
+ protected:
+ DocHitInfoIteratorPropertyInSchemaTest()
+ : test_dir_(GetTestTempDir() + "/icing") {}
+
+ void SetUp() override {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ document1_ = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("email")
+ .Build();
+ document2_ =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("note").Build();
+
+ indexed_section_0 = "indexedSection0";
+ unindexed_section_1 = "unindexedSection1";
+ not_defined_section_2 = "notDefinedSection2";
+
+ schema_ =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_0)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(unindexed_section_1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("note").AddProperty(
+ PropertyConfigBuilder()
+ .SetName(unindexed_section_1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::string indexed_section_0;
+ std::string unindexed_section_1;
+ std::string not_defined_section_2;
+ SchemaProto schema_;
+ DocumentProto document1_;
+ DocumentProto document2_;
+ FakeClock fake_clock_;
+};
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ AdvanceToDocumentWithIndexedProperty) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0});
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ AdvanceToDocumentWithUnindexedProperty) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{unindexed_section_1});
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest, NoMatchWithUndefinedProperty) {
+ ICING_EXPECT_OK(document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{not_defined_section_2});
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ CorrectlySetsSectionIdMasksAndPopulatesTermMatchInfo) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ // Arbitrary section ids for the documents in the DocHitInfoIterators.
+ // Created to test correct section_id_mask behavior.
+ SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2
+
+ DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id);
+ doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+ doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+ auto original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+ original_iterator->set_hit_intersect_section_ids_mask(
+ original_section_id_mask);
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0});
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+ ICING_EXPECT_OK(property_defined_iterator.Advance());
+ EXPECT_THAT(property_defined_iterator.doc_hit_info().document_id(),
+ Eq(document_id));
+
+ // The expected mask is the same as the original mask, since the iterator
+ // should treat it as a pass-through.
+ SectionIdMask expected_section_id_mask = original_section_id_mask;
+ EXPECT_EQ(property_defined_iterator.hit_intersect_section_ids_mask(),
+ expected_section_id_mask);
+
+ property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{0, 1}, {2, 2}};
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hi", expected_section_ids_tf_map)));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ TrimRightMostNodeResultsInError) {
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0});
+
+ EXPECT_THAT(std::move(property_defined_iterator).TrimRightMostNode(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ FindPropertyDefinedByMultipleTypes) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{unindexed_section_1});
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id2, document_id1));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 60b9a12..78f4d34 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -95,7 +95,9 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema_));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index e1f06d0..d8cd3ad 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -85,10 +85,11 @@ class DocHitInfoIterator {
unnormalized_term_length_(unnormalized_term_length) {}
};
- // Trim the right-most itertor of the itertor tree.
- // This is to support search suggestion for the last terms which is the
+ // Trim the rightmost iterator of the iterator tree.
+ // This is to support search suggestions for the last term which is the
// right-most node of the root iterator tree. Only support trim the right-most
- // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF and Filter itertor.
+ // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF, Filter, and the
+ // property-in-schema-check iterator.
//
// After calling this method, this iterator is no longer usable. Please use
// the returned iterator.
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 7df137c..d5e9d57 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -22,6 +22,7 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/destructible-directory.h"
+#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/index/term-id-codec.h"
@@ -90,6 +91,10 @@ FindTermResult FindShortestValidTermWithPrefixHits(
return result;
}
+std::string MakeFlashIndexFilename(const std::string& base_dir) {
+ return base_dir + "/main_index";
+}
+
} // namespace
MainIndex::MainIndex(const std::string& index_directory,
@@ -112,12 +117,18 @@ libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create(
return main_index;
}
+/* static */ libtextclassifier3::StatusOr<int> MainIndex::ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& index_directory) {
+ return FlashIndexStorage::ReadHeaderMagic(
+ filesystem, MakeFlashIndexFilename(index_directory));
+}
+
// TODO(b/139087650) : Migrate off of IcingFilesystem.
libtextclassifier3::Status MainIndex::Init() {
if (!filesystem_->CreateDirectoryRecursively(base_dir_.c_str())) {
return absl_ports::InternalError("Unable to create main index directory.");
}
- std::string flash_index_file = base_dir_ + "/main_index";
+ std::string flash_index_file = MakeFlashIndexFilename(base_dir_);
ICING_ASSIGN_OR_RETURN(
FlashIndexStorage flash_index,
FlashIndexStorage::Create(flash_index_file, filesystem_,
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index e181330..9e570d5 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -48,6 +48,16 @@ class MainIndex {
const std::string& index_directory, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
+ // Reads magic from existing flash index storage file header. We need this
+ // during Icing initialization phase to determine the version.
+ //
+ // RETURNS:
+ // - On success, a valid magic.
+ // - NOT_FOUND if the flash index doesn't exist.
+ // - INTERNAL on I/O error.
+ static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& index_directory);
+
// Get a PostingListHitAccessor that holds the posting list chain for 'term'.
//
// RETURNS:
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index f3901e1..5165040 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -292,12 +292,17 @@ libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
// Merge sections with same document_id into a single DocHitInfo
while (!pq_.empty() &&
pq_.top()->GetCurrentBasicHit().document_id() == document_id) {
- doc_hit_info_.UpdateSection(pq_.top()->GetCurrentBasicHit().section_id());
-
BucketPostingListIterator* bucket_itr = pq_.top();
pq_.pop();
- if (bucket_itr->AdvanceAndFilter(key_lower_, key_upper_).ok()) {
+ libtextclassifier3::Status advance_status;
+ do {
+ doc_hit_info_.UpdateSection(
+ bucket_itr->GetCurrentBasicHit().section_id());
+ advance_status = bucket_itr->AdvanceAndFilter(key_lower_, key_upper_);
+ } while (advance_status.ok() &&
+ bucket_itr->GetCurrentBasicHit().document_id() == document_id);
+ if (advance_status.ok()) {
pq_.push(bucket_itr);
}
}
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
index 54b19c3..27f35d9 100644
--- a/icing/index/numeric/integer-index-storage_benchmark.cc
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -12,22 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <algorithm>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/destructible-directory.h"
#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/numeric/normal-distribution-number-generator.h"
#include "icing/testing/numeric/number-generator.h"
#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
#include "icing/testing/tmp-directory.h"
@@ -65,6 +73,7 @@ static constexpr int kDefaultSeed = 12345;
enum DistributionTypeEnum {
kUniformDistribution,
+ kNormalDistribution,
};
class IntegerIndexStorageBenchmark {
@@ -103,6 +112,19 @@ CreateIntegerGenerator(DistributionTypeEnum distribution_type, int seed,
return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
seed, /*range_lower=*/0,
/*range_upper=*/static_cast<int64_t>(num_keys) * 10 - 1);
+ case DistributionTypeEnum::kNormalDistribution:
+ // Normal distribution with mean = 0 and stddev = num_keys / 1024.
+ // - keys in range [-1 * stddev, 1 * stddev]: 68.2%
+ // - keys in range [-2 * stddev, 2 * stddev]: 95.4%
+ // - keys in range [-3 * stddev, 3 * stddev]: 99.7%
+ //
+ // - When generating num_keys integers, 68.2% of them will be in range
+ // [-num_keys / 1024, num_keys / 1024]
+ // - Each number in this range will be sampled (num_keys * 0.682) /
+ // ((num_keys / 1024) * 2) = 349 times on average and become
+ // "single-range bucket".
+ return std::make_unique<NormalDistributionNumberGenerator<int64_t>>(
+ seed, /*mean=*/0.0, /*stddev=*/num_keys / 1024.0);
default:
return absl_ports::InvalidArgumentError("Unknown type");
}
@@ -155,7 +177,18 @@ BENCHMARK(BM_Index)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
- ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
void BM_BatchIndex(benchmark::State& state) {
DistributionTypeEnum distribution_type =
@@ -203,7 +236,18 @@ BENCHMARK(BM_BatchIndex)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
- ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
void BM_ExactQuery(benchmark::State& state) {
DistributionTypeEnum distribution_type =
@@ -269,7 +313,81 @@ BENCHMARK(BM_ExactQuery)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
- ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_RangeQueryAll(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ IntegerIndexStorageBenchmark benchmark;
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(),
+ &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ for (int i = 0; i < num_keys; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+ kDefaultSectionId,
+ {generator->Generate()}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ for (auto _ : state) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iterator,
+ storage->GetIterator(
+ /*query_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*query_key_upper=*/std::numeric_limits<int64_t>::max()));
+ std::vector<DocHitInfo> data;
+ while (iterator->Advance().ok()) {
+ data.push_back(iterator->doc_hit_info());
+ }
+
+ ASSERT_THAT(data, SizeIs(num_keys));
+ }
+}
+BENCHMARK(BM_RangeQueryAll)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
} // namespace
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
index ec7f55b..92433e1 100644
--- a/icing/index/numeric/integer-index_test.cc
+++ b/icing/index/numeric/integer-index_test.cc
@@ -389,7 +389,10 @@ TYPED_TEST(NumericIndexIntegerTest, WildcardStorageQuery) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("desiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Put 11 docs of "TypeA" into the document store.
DocumentProto doc =
@@ -1492,7 +1495,10 @@ TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("desiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Ids are assigned alphabetically, so the property ids are:
// TypeA.desiredProperty = 0
@@ -1862,7 +1868,10 @@ TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("desiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Ids are assigned alphabetically, so the property ids are:
// TypeA.desiredProperty = 0
@@ -2145,7 +2154,10 @@ TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("undesiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Ids are assigned alphabetically, so the property ids are:
// TypeA.desiredProperty = 0
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index a992568..69b8889 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -30,6 +30,8 @@
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
@@ -121,7 +123,8 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
}
if (put_document_stats != nullptr) {
- // TODO(b/259744228): set term index latency.
+ put_document_stats->set_term_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
num_tokens);
}