aboutsummaryrefslogtreecommitdiff
path: root/icing/icing-search-engine_initialization_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/icing-search-engine_initialization_test.cc')
-rw-r--r--icing/icing-search-engine_initialization_test.cc676
1 files changed, 622 insertions, 54 deletions
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index b4853b4..122e4af 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -12,28 +12,39 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <algorithm>
#include <cstdint>
#include <limits>
#include <memory>
#include <string>
+#include <string_view>
+#include <tuple>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/document-builder.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/file/version-util.h"
#include "icing/icing-search-engine.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index-processor.h"
#include "icing/index/index.h"
#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/term-indexing-handler.h"
#include "icing/jni/jni-cache.h"
-#include "icing/join/doc-join-info.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -59,8 +70,12 @@
#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -71,6 +86,7 @@
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
#include "icing/util/tokenized-document.h"
#include "unicode/uloc.h"
@@ -211,6 +227,8 @@ std::string GetHeaderFilename() {
IcingSearchEngineOptions GetDefaultIcingOptions() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestBaseDir());
+ icing_options.set_document_store_namespace_id_fingerprint(true);
+ icing_options.set_use_new_qualified_id_join_index(true);
return icing_options;
}
@@ -1040,12 +1058,14 @@ TEST_F(IcingSearchEngineInitializationTest,
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
{
// Initializes folder and schema, index one document
- TestIcingSearchEngine icing(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
- GetTestJniCache());
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
@@ -1064,7 +1084,9 @@ TEST_F(IcingSearchEngineInitializationTest,
DocumentStore::Create(
filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*namespace_id_fingerprint=*/
+ icing_options.document_store_namespace_id_fingerprint(),
+ /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
@@ -1102,8 +1124,7 @@ TEST_F(IcingSearchEngineInitializationTest,
HasSubstr("/qualified_id_join_index_dir/")))
.Times(0);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(), GetTestJniCache());
InitializeResultProto initialize_result = icing.Initialize();
@@ -1201,6 +1222,222 @@ TEST_F(IcingSearchEngineInitializationTest,
expected_join_search_result_proto));
}
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptedDocumentStore) {
+ // Test the following scenario: some document store derived files are
+ // corrupted. IcingSearchEngine should be able to recover the document store,
+ // and since NamespaceIds were reassigned, we should rebuild qualified id join
+ // index as well. Several additional behaviors are also tested:
+ // - Index directory handling:
+ // - Term index directory should be unaffected.
+ // - Integer index directory should be unaffected.
+ // - Should discard the entire qualified id join index directory and start
+ // it from scratch.
+ // - Truncate indices:
+ // - "TruncateTo()" for term index shouldn't take effect.
+ // - "Clear()" shouldn't be called for integer index, i.e. no integer index
+ // storage sub directories (path_expr = "*/integer_index_dir/*") should be
+ // discarded.
+ // - "Clear()" shouldn't be called for qualified id join index, i.e. no
+ // underlying storage sub directory (path_expr =
+ // "*/qualified_id_join_index_dir/*") should be discarded.
+ // - Still, we need to replay and reindex documents (for qualified id join
+ // index).
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto personDummy =
+ DocumentBuilder()
+ .SetKey("namespace2", "personDummy")
+ .SetSchema("Person")
+ .AddStringProperty("name", "personDummy")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace2", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace2#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Initializes folder and schema, index one document
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ // "namespace2" (in personDummy) will be assigned NamespaceId = 0.
+ EXPECT_THAT(icing.Put(personDummy).status(), ProtoIsOk());
+ // "namespace1" (in person1) will be assigned NamespaceId = 1.
+ EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+
+ // Now delete personDummy.
+ EXPECT_THAT(
+ icing.Delete(personDummy.namespace_(), personDummy.uri()).status(),
+ ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+ // Manually corrupt one of the derived files of DocumentStore without
+ // updating checksum in DocumentStore header.
+ std::string score_cache_filename = GetDocumentDir() + "/score_cache";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>>
+ score_cache,
+ FileBackedVector<DocumentAssociatedScoreData>::Create(
+ *filesystem(), std::move(score_cache_filename),
+ MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(const DocumentAssociatedScoreData* score_data,
+ score_cache->Get(/*idx=*/0));
+ ICING_ASSERT_OK(score_cache->Set(
+ /*idx=*/0,
+ DocumentAssociatedScoreData(score_data->corpus_id(),
+ score_data->document_score() + 1,
+ score_data->creation_timestamp_ms(),
+ score_data->length_in_tokens())));
+ ICING_ASSERT_OK(score_cache->PersistToDisk());
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should be discarded once, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ // DocumentStore should be recovered. When reassigning NamespaceId, the order
+ // will be the document traversal order: [person1, person2, message].
+ // Therefore, "namespace1" will have id = 0 and "namespace2" will have id = 1.
+ EXPECT_THAT(
+ initialize_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ // Term, integer index should be unaffected.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ // Qualified id join index should be rebuilt.
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field. message2
+ // should be joined to person2 correctly.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person2;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+
+ *expected_join_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person1;
+
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
// Test the following scenario: term index is corrupted (e.g. checksum doesn't
// match). IcingSearchEngine should be able to recover term index. Several
@@ -3696,16 +3933,18 @@ TEST_F(IcingSearchEngineInitializationTest,
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ QualifiedIdJoinIndexImplV2::Create(filesystem,
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
// Add data for document 0.
ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
kInvalidDocumentId);
qualified_id_join_index->set_last_added_document_id(0);
ICING_ASSERT_OK(qualified_id_join_index->Put(
- DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0),
- /*ref_qualified_id_str=*/"namespace#person"));
+ /*schema_type_id=*/0, /*joinable_property_id=*/0, /*document_id=*/0,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*target_str=*/"uri")}));
}
// 3. Create the index again. This should trigger index restoration.
@@ -3766,12 +4005,14 @@ TEST_F(IcingSearchEngineInitializationTest,
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
- EXPECT_THAT(qualified_id_join_index->Get(
- DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ QualifiedIdJoinIndexImplV2::Create(filesystem,
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto iterator, qualified_id_join_index->GetIterator(
+ /*schema_type_id=*/0, /*joinable_property_id=*/0));
+ EXPECT_THAT(iterator->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
}
@@ -3855,7 +4096,6 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
- DocJoinInfo additional_data_key;
// 2. Manually add some data into qualified id join index and increment
// last_added_document_id. This will cause mismatched document id with
// document store.
@@ -3867,20 +4107,20 @@ TEST_F(IcingSearchEngineInitializationTest,
Filesystem filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false,
- /*use_persistent_hash_map=*/false));
+ QualifiedIdJoinIndexImplV2::Create(filesystem,
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
// Add data for document 4.
DocumentId original_last_added_doc_id =
qualified_id_join_index->last_added_document_id();
qualified_id_join_index->set_last_added_document_id(
original_last_added_doc_id + 1);
- additional_data_key =
- DocJoinInfo(/*document_id=*/original_last_added_doc_id + 1,
- /*joinable_property_id=*/0);
ICING_ASSERT_OK(qualified_id_join_index->Put(
- additional_data_key,
- /*ref_qualified_id_str=*/"namespace#person"));
+ /*schema_type_id=*/1, /*joinable_property_id=*/0,
+ /*document_id=*/original_last_added_doc_id + 1,
+ /*ref_namespace_fingerprint_ids=*/
+ {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+ /*target_str=*/"person")}));
}
// 3. Create the index again. This should trigger index restoration.
@@ -4288,9 +4528,12 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(
initialize_result_proto.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::PARTIAL_LOSS));
- // Since document store rewinds to previous checkpoint, last stored doc id
- // will be consistent with last added document ids in term/integer indices,
- // so there will be no index restoration.
+ // Document store rewinds to previous checkpoint and all derived files were
+ // regenerated.
+ // - Last stored doc id will be consistent with last added document ids in
+ // term/integer indices, so there will be no index restoration.
+ // - Qualified id join index depends on document store derived files and
+ // since they were regenerated, we should rebuild qualified id join index.
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
Eq(InitializeStatsProto::NONE));
@@ -4299,10 +4542,10 @@ TEST_F(IcingSearchEngineInitializationTest,
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.qualified_id_join_index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
- Eq(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
Eq(InitializeStatsProto::NONE));
@@ -4954,7 +5197,7 @@ TEST_F(IcingSearchEngineInitializationTest,
auto mock_filesystem = std::make_unique<MockFilesystem>();
EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
.WillRepeatedly(DoDefault());
- // This fails QualifiedIdJoinIndex::Create() once.
+ // This fails QualifiedIdJoinIndexImplV2::Create() once.
EXPECT_CALL(
*mock_filesystem,
PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
@@ -5054,10 +5297,10 @@ TEST_F(IcingSearchEngineInitializationTest,
Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.qualified_id_join_index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(0));
+ Eq(10));
EXPECT_THAT(
initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
Eq(InitializeStatsProto::NONE));
@@ -5160,6 +5403,169 @@ TEST_F(IcingSearchEngineInitializationTest,
}
}
+// TODO(b/275121148): deprecate this test after rollout join index v2.
+class IcingSearchEngineInitializationSwitchJoinIndexTest
+ : public IcingSearchEngineInitializationTest,
+ public ::testing::WithParamInterface<bool> {};
+TEST_P(IcingSearchEngineInitializationSwitchJoinIndexTest, SwitchJoinIndex) {
+ bool use_join_index_v2 = GetParam();
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with message 3 documents.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(true);
+ options.set_use_new_qualified_id_join_index(use_join_index_v2);
+
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/2").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ message = DocumentBuilder(message).SetUri("message/3").Build();
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again changing join index version. This should trigger
+ // join index restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should never be discarded, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(0);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should be discarded once, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(1);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_document_store_namespace_id_fingerprint(true);
+ options.set_use_new_qualified_id_join_index(!use_join_index_v2);
+
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+ // Verify qualified id join index works normally: join a query for
+ // `name:person` with a child query for `body:consectetur` based on the
+ // child's `senderQualifiedId` field.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:consectetur");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("person"));
+ EXPECT_THAT(results.results(0).joined_results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).joined_results(0).document().uri(),
+ Eq("message/3"));
+ EXPECT_THAT(results.results(0).joined_results(1).document().uri(),
+ Eq("message/2"));
+ EXPECT_THAT(results.results(0).joined_results(2).document().uri(),
+ Eq("message/1"));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(IcingSearchEngineInitializationSwitchJoinIndexTest,
+ IcingSearchEngineInitializationSwitchJoinIndexTest,
+ testing::Values(true, false));
+
class IcingSearchEngineInitializationVersionChangeTest
: public IcingSearchEngineInitializationTest,
public ::testing::WithParamInterface<version_util::VersionInfo> {};
@@ -5219,12 +5625,14 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
{
// Initializes folder and schema, index person1 and person2
- TestIcingSearchEngine icing(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
- GetTestJniCache());
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
@@ -5251,7 +5659,9 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
DocumentStore::Create(
filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*namespace_id_fingerprint=*/
+ icing_options.document_store_namespace_id_fingerprint(),
+ /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
@@ -5276,25 +5686,26 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
- QualifiedIdJoinIndex::Create(
- *filesystem(), GetQualifiedIdJoinIndexDir(),
- /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false));
+ QualifiedIdJoinIndexImplV2::Create(*filesystem(),
+ GetQualifiedIdJoinIndexDir(),
+ /*pre_mapping_fbv=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<StringSectionIndexingHandler>
- string_section_indexing_handler,
- StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(),
- index.get()));
+ std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+ TermIndexingHandler::Create(
+ &fake_clock, normalizer_.get(), index.get(),
+ /*build_property_existence_metadata_hits=*/true));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
integer_section_indexing_handler,
IntegerSectionIndexingHandler::Create(
&fake_clock, integer_index.get()));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
- qualified_id_join_indexing_handler,
- QualifiedIdJoinIndexingHandler::Create(
- &fake_clock, qualified_id_join_index.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_join_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ &fake_clock, document_store.get(), qualified_id_join_index.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
- handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(term_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
handlers.push_back(std::move(qualified_id_join_indexing_handler));
IndexProcessor index_processor(std::move(handlers), &fake_clock);
@@ -5457,6 +5868,163 @@ INSTANTIATE_TEST_SUITE_P(
/*version_in=*/0,
/*max_version_in=*/version_util::kVersion)));
+class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest
+ : public IcingSearchEngineInitializationTest,
+ public ::testing::WithParamInterface<std::tuple<bool, bool>> {};
+TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+ ChangePropertyExistenceHitsFlagTest) {
+ bool before_build_property_existence_metadata_hits = std::get<0>(GetParam());
+ bool after_build_property_existence_metadata_hits = std::get<1>(GetParam());
+ bool flag_changed = before_build_property_existence_metadata_hits !=
+ after_build_property_existence_metadata_hits;
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Value")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("score")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create a document with every property.
+ DocumentProto document0 = DocumentBuilder()
+ .SetKey("icing", "uri0")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing body.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "uri1")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("timestamp", 123)
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+ // Create a document with missing timestamp.
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "uri2")
+ .SetSchema("Value")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("body", "foo")
+ .AddDoubleProperty("score", 456.789)
+ .Build();
+
+ // 1. Create an index with the 3 documents.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(
+ before_build_property_existence_metadata_hits);
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again with
+ // after_build_property_existence_metadata_hits.
+ //
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure that the term index is rebuilt if the flag is changed.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(flag_changed ? 1 : 0);
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_build_property_existence_metadata_hits(
+ after_build_property_existence_metadata_hits);
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ // Ensure that the term index is rebuilt if the flag is changed.
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(flag_changed ? InitializeStatsProto::IO_ERROR
+ : InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Get all documents that have "body".
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+ search_spec.add_enabled_features(
+ std::string(kListFilterQueryLanguageFeature));
+ search_spec.set_query("hasProperty(\"body\")");
+ SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ if (after_build_property_existence_metadata_hits) {
+ EXPECT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+ } else {
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+
+ // Get all documents that have "timestamp".
+ search_spec.set_query("hasProperty(\"timestamp\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ if (after_build_property_existence_metadata_hits) {
+ EXPECT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+ } else {
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+
+ // Get all documents that have "score".
+ search_spec.set_query("hasProperty(\"score\")");
+ results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ if (after_build_property_existence_metadata_hits) {
+ EXPECT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
+ EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
+ } else {
+ EXPECT_THAT(results.results(), IsEmpty());
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+ IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+ testing::Values(std::make_tuple(false, false), std::make_tuple(false, true),
+ std::make_tuple(true, false), std::make_tuple(true, true)));
+
} // namespace
} // namespace lib
} // namespace icing