// Copyright (C) 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include "icing/text_classifier/lib3/utils/base/status.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/document-builder.h" #include "icing/file/filesystem.h" #include "icing/file/mock-filesystem.h" #include "icing/file/version-util.h" #include "icing/icing-search-engine.h" #include "icing/index/index-processor.h" #include "icing/index/index.h" #include "icing/index/integer-section-indexing-handler.h" #include "icing/index/numeric/integer-index.h" #include "icing/index/string-section-indexing-handler.h" #include "icing/jni/jni-cache.h" #include "icing/join/doc-join-info.h" #include "icing/join/join-processor.h" #include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/join/qualified-id-type-joinable-index.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/legacy/index/icing-mock-filesystem.h" #include "icing/portable/endian.h" #include "icing/portable/equals-proto.h" #include "icing/portable/platform.h" #include "icing/proto/debug.pb.h" #include "icing/proto/document.pb.h" #include "icing/proto/document_wrapper.pb.h" #include "icing/proto/initialize.pb.h" #include "icing/proto/logging.pb.h" #include "icing/proto/optimize.pb.h" #include "icing/proto/persist.pb.h" #include "icing/proto/reset.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" #include "icing/proto/status.pb.h" #include "icing/proto/storage.pb.h" #include "icing/proto/term.pb.h" #include "icing/proto/usage.pb.h" #include "icing/query/query-features.h" #include "icing/schema-builder.h" #include "icing/schema/schema-store.h" #include "icing/store/document-id.h" #include "icing/store/document-log-creator.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/icu-data-file-helper.h" #include "icing/testing/jni-test-helpers.h" #include "icing/testing/test-data.h" #include "icing/testing/tmp-directory.h" #include "icing/tokenization/language-segmenter-factory.h" #include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer-factory.h" #include "icing/transform/normalizer.h" #include "icing/util/tokenized-document.h" #include "unicode/uloc.h" namespace icing { namespace lib { namespace { using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::_; using ::testing::AtLeast; using ::testing::DoDefault; using ::testing::EndsWith; using ::testing::Eq; using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::Return; using ::testing::SizeIs; constexpr std::string_view kIpsumText = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis " "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida " "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam " "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo " "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, " "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula " "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et " "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, " "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis " "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. " "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. " "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur " "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh " "placerat semper."; PortableFileBackedProtoLog::Header ReadDocumentLogHeader( Filesystem filesystem, const std::string& file_path) { PortableFileBackedProtoLog::Header header; filesystem.PRead(file_path.c_str(), &header, sizeof(PortableFileBackedProtoLog::Header), /*offset=*/0); return header; } void WriteDocumentLogHeader( Filesystem filesystem, const std::string& file_path, PortableFileBackedProtoLog::Header& header) { filesystem.Write(file_path.c_str(), &header, sizeof(PortableFileBackedProtoLog::Header)); } // For mocking purpose, we allow tests to provide a custom Filesystem. class TestIcingSearchEngine : public IcingSearchEngine { public: TestIcingSearchEngine(const IcingSearchEngineOptions& options, std::unique_ptr filesystem, std::unique_ptr icing_filesystem, std::unique_ptr clock, std::unique_ptr jni_cache) : IcingSearchEngine(options, std::move(filesystem), std::move(icing_filesystem), std::move(clock), std::move(jni_cache)) {} }; std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } // This test is meant to cover all tests relating to // IcingSearchEngine::Initialize. class IcingSearchEngineInitializationTest : public testing::Test { protected: void SetUp() override { if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { // If we've specified using the reverse-JNI method for segmentation (i.e. // not ICU), then we won't have the ICU data file included to set up. // Technically, we could choose to use reverse-JNI for segmentation AND // include an ICU data file, but that seems unlikely and our current BUILD // setup doesn't do this. // File generated via icu_data_file rule in //icing/BUILD. std::string icu_data_file_path = GetTestFilePath("icing/icu.dat"); ICING_ASSERT_OK( icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); } filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); ICING_ASSERT_OK_AND_ASSIGN( lang_segmenter_, language_segmenter_factory::Create(std::move(segmenter_options))); ICING_ASSERT_OK_AND_ASSIGN( normalizer_, normalizer_factory::Create( /*max_term_byte_size=*/std::numeric_limits::max())); } void TearDown() override { normalizer_.reset(); lang_segmenter_.reset(); filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); } const Filesystem* filesystem() const { return &filesystem_; } const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; } Filesystem filesystem_; IcingFilesystem icing_filesystem_; std::unique_ptr lang_segmenter_; std::unique_ptr normalizer_; }; // Non-zero value so we don't override it to be the current time constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; } std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; } std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; } std::string GetIntegerIndexDir() { return GetTestBaseDir() + "/integer_index_dir"; } std::string GetQualifiedIdJoinIndexDir() { return GetTestBaseDir() + "/qualified_id_join_index_dir"; } std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; } std::string GetHeaderFilename() { return GetTestBaseDir() + "/icing_search_engine_header"; } IcingSearchEngineOptions GetDefaultIcingOptions() { IcingSearchEngineOptions icing_options; icing_options.set_base_dir(GetTestBaseDir()); return icing_options; } DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { return DocumentBuilder() .SetKey(std::move(name_space), std::move(uri)) .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); } DocumentProto CreateEmailDocument(const std::string& name_space, const std::string& uri, int score, const std::string& subject_content, const std::string& body_content) { return DocumentBuilder() .SetKey(name_space, uri) .SetSchema("Email") .SetScore(score) .AddStringProperty("subject", subject_content) .AddStringProperty("body", body_content) .Build(); } SchemaTypeConfigProto CreateMessageSchemaTypeConfig() { return SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .Build(); } SchemaTypeConfigProto CreateEmailSchemaTypeConfig() { return SchemaTypeConfigBuilder() .SetType("Email") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("subject") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .Build(); } SchemaProto CreateMessageSchema() { return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build(); } SchemaProto CreateEmailSchema() { return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build(); } ScoringSpecProto GetDefaultScoringSpec() { ScoringSpecProto scoring_spec; scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); return scoring_spec; } // TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder, // SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all // instances by them. TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); SchemaProto email_schema = CreateMessageSchema(); EXPECT_THAT(icing.SetSchema(email_schema).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing.GetSchema().status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); DocumentProto doc = CreateMessageDocument("namespace", "uri"); EXPECT_THAT(icing.Put(doc).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing .Get(doc.namespace_(), doc.uri(), GetResultSpecProto::default_instance()) .status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type()) .status() .code(), Eq(StatusProto::FAILED_PRECONDITION)); SearchSpecProto search_spec = SearchSpecProto::default_instance(); ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance(); ResultSpecProto result_spec = ResultSpecProto::default_instance(); EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); constexpr int kSomePageToken = 12; EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash. EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); } TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); DocumentProto document = CreateMessageDocument("namespace", "uri"); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk()); } TEST_F(IcingSearchEngineInitializationTest, InitializingAgainSavesNonPersistedData) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); DocumentProto document = CreateMessageDocument("namespace", "uri"); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_get_result_proto.mutable_document() = document; ASSERT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); } TEST_F(IcingSearchEngineInitializationTest, MaxIndexMergeSizeReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(std::numeric_limits::max()); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, NegativeMergeSizeReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(-1); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, ZeroMergeSizeReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(0); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); // One is fine, if a bit weird. It just means that the lite index will be // smaller and will request a merge any time content is added to it. options.set_index_merge_size(1); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); } TEST_F(IcingSearchEngineInitializationTest, NegativeMaxTokenLenReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_max_token_length(-1); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, ZeroMaxTokenLenReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_max_token_length(0); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, NegativeCompressionLevelReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_compression_level(-1); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, GreaterThanMaxCompressionLevelReturnsInvalidArgument) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_compression_level(10); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_compression_level(0); IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); } TEST_F(IcingSearchEngineInitializationTest, ReinitializingWithDifferentCompressionLevelReturnsOk) { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_compression_level(3); { IcingSearchEngine icing(options, GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); DocumentProto document = CreateMessageDocument("namespace", "uri"); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); } options.set_compression_level(9); { IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); } options.set_compression_level(0); { IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); } } TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) { auto mock_filesystem = std::make_unique(); // This fails DocumentStore::Create() ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_)) .WillByDefault(Return(false)); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoStatusIs(StatusProto::INTERNAL)); EXPECT_THAT(initialize_result_proto.status().message(), HasSubstr("Could not create directory")); } TEST_F(IcingSearchEngineInitializationTest, InitMarkerFilePreviousFailuresAtThreshold) { Filesystem filesystem; DocumentProto email1 = CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); email1.set_creation_timestamp_ms(10000); DocumentProto email2 = CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); email2.set_creation_timestamp_ms(10000); { // Create an index with a few documents. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoIsOk()); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(0)); ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); } // Write an init marker file with 5 previously failed attempts. std::string marker_filepath = GetTestBaseDir() + "/init_marker"; { ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); int network_init_attempts = GHostToNetworkL(5); // Write the updated number of attempts before we get started. ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, &network_init_attempts, sizeof(network_init_attempts))); ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); } { // Create the index again and verify that initialization succeeds and no // data is thrown out. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoIsOk()); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(5)); EXPECT_THAT( icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) .document(), EqualsProto(email1)); EXPECT_THAT( icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) .document(), EqualsProto(email2)); } // The successful init should have thrown out the marker file. ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); } TEST_F(IcingSearchEngineInitializationTest, InitMarkerFilePreviousFailuresBeyondThreshold) { Filesystem filesystem; DocumentProto email1 = CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); DocumentProto email2 = CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); { // Create an index with a few documents. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoIsOk()); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(0)); ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); } // Write an init marker file with 6 previously failed attempts. std::string marker_filepath = GetTestBaseDir() + "/init_marker"; { ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); int network_init_attempts = GHostToNetworkL(6); // Write the updated number of attempts before we get started. ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, &network_init_attempts, sizeof(network_init_attempts))); ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); } { // Create the index again and verify that initialization succeeds and all // data is thrown out. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(6)); EXPECT_THAT( icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) .status(), ProtoStatusIs(StatusProto::NOT_FOUND)); EXPECT_THAT( icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) .status(), ProtoStatusIs(StatusProto::NOT_FOUND)); } // The successful init should have thrown out the marker file. ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); } TEST_F(IcingSearchEngineInitializationTest, SuccessiveInitFailuresIncrementsInitMarker) { Filesystem filesystem; DocumentProto email1 = CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); DocumentProto email2 = CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); { // 1. Create an index with a few documents. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoIsOk()); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(0)); ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); } { // 2. Create an index that will encounter an IO failure when trying to // create the document log. IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); auto mock_filesystem = std::make_unique(); std::string document_log_filepath = icing_options.base_dir() + "/document_dir/document_log_v1"; ON_CALL(*mock_filesystem, GetFileSize(Matcher(Eq(document_log_filepath)))) .WillByDefault(Return(Filesystem::kBadFileSize)); TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); // Fail to initialize six times in a row. InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(0)); init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(1)); init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(2)); init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(3)); init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(4)); init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(5)); } { // 3. Create the index again and verify that initialization succeeds and all // data is thrown out. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), Eq(6)); EXPECT_THAT( icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) .status(), ProtoStatusIs(StatusProto::NOT_FOUND)); EXPECT_THAT( icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) .status(), ProtoStatusIs(StatusProto::NOT_FOUND)); } // The successful init should have thrown out the marker file. std::string marker_filepath = GetTestBaseDir() + "/init_marker"; ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); } TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) { SearchSpecProto search_spec; search_spec.set_query("message"); search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = CreateMessageDocument("namespace", "uri"); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_get_result_proto.mutable_document() = CreateMessageDocument("namespace", "uri"); { // Basic initialization/setup IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); EXPECT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str())); // We should be able to recover from this and access all our previous data IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); // Checks that DocumentLog is still ok EXPECT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); // Checks that the term index is still ok so we can search over it SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); // Checks that the integer index is still ok so we can search over it SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto search_result_google::protobuf = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); // Checks that Schema is still since it'll be needed to validate the document EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); } TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) { { // Basic initialization/setup IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_get_result_proto.mutable_document() = CreateMessageDocument("namespace", "uri"); EXPECT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to const std::string schema_file = absl_ports::StrCat(GetSchemaDir(), "/schema.pb"); const std::string corrupt_data = "1234"; EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(), corrupt_data.size())); IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INTERNAL)); } TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptDocumentLog) { { // Basic initialization/setup IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_get_result_proto.mutable_document() = CreateMessageDocument("namespace", "uri"); EXPECT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to const std::string document_log_file = absl_ports::StrCat( GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); const std::string corrupt_data = "1234"; EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(), corrupt_data.data(), corrupt_data.size())); IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoStatusIs(StatusProto::INTERNAL)); } TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentSchemaStore) { DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); DocumentProto document2_with_additional_property = DocumentBuilder() .SetKey("namespace", "uri2") .SetSchema("Message") .AddStringProperty("additional", "content") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); IcingSearchEngineOptions options = GetDefaultIcingOptions(); { // Initializes folder and schema IcingSearchEngine icing(options, GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); SchemaProto schema = SchemaBuilder() .AddType( SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig()) // Add non-indexable property "additional" .AddProperty(PropertyConfigBuilder() .SetName("additional") .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document2_with_additional_property).status(), ProtoIsOk()); // Won't get us anything because "additional" isn't marked as an indexed // property in the schema SearchSpecProto search_spec; search_spec.set_query("additional:content"); search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to { // This schema will change the SchemaTypeIds from the previous schema_ // (since SchemaTypeIds are assigned based on order of the types, and this // new schema changes the ordering of previous types) SchemaProto new_schema; auto type = new_schema.add_types(); type->set_schema_type("Email"); // Switching a non-indexable property to indexable changes the SectionIds // (since SectionIds are assigned based on alphabetical order of indexed // sections, marking "additional" as an indexed property will push the // "body" and "indexableInteger" property to different SectionIds) *new_schema.add_types() = SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig()) .AddProperty( PropertyConfigBuilder() .SetName("additional") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL)) .Build(); // Write the marker file std::string marker_filepath = absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); ASSERT_TRUE(sfd.is_valid()); // Write the new schema FakeClock fake_clock; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); ICING_EXPECT_OK(schema_store->SetSchema( new_schema, /*ignore_errors_and_delete_documents=*/false, /*allow_circular_schema_definitions=*/false)); } // Will persist new schema IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); // We can insert a Email document since we kept the new schema DocumentProto email_document = DocumentBuilder() .SetKey("namespace", "email_uri") .SetSchema("Email") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_get_result_proto.mutable_document() = email_document; EXPECT_THAT(icing.Get("namespace", "email_uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); // Verify term search SearchSpecProto search_spec1; // The section restrict will ensure we are using the correct, updated // SectionId in the Index search_spec1.set_query("additional:content"); // Schema type filter will ensure we're using the correct, updated // SchemaTypeId in the DocumentStore search_spec1.add_schema_type_filters("Message"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto expected_search_result_proto1; expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto1.mutable_results()->Add()->mutable_document() = document2_with_additional_property; SearchResultProto search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto1)); // Verify numeric (integer) search SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec1.add_schema_type_filters("Message"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto expected_search_result_google::protobuf; expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK); *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() = document2_with_additional_property; *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() = document1; SearchResultProto search_result_google::protobuf = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_google::protobuf)); } TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentDocumentStore) { // Test the following scenario: document store is ahead of term, integer and // qualified id join index. IcingSearchEngine should be able to recover all // indices. Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. // - Still, we need to replay and reindex documents. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message1 = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", "message body one") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message2 = DocumentBuilder() .SetKey("namespace", "message/2") .SetSchema("Message") .AddStringProperty("body", "message body two") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); { // Initializes folder and schema, index one document TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk()); } // This should shut down IcingSearchEngine and persist anything it needs to { FakeClock fake_clock; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); // Puts message2 into DocumentStore but doesn't index it. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(), /*force_recovery_and_revalidate_documents=*/false, /*namespace_id_fingerprint=*/false, PortableFileBackedProtoLog< DocumentWrapper>::kDeflateCompressionLevel, /*initialize_stats=*/nullptr)); std::unique_ptr document_store = std::move(create_result.document_store); ICING_EXPECT_OK(document_store->Put(message2)); } // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory // "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); EXPECT_THAT(initialize_result.status(), ProtoIsOk()); // Index Restoration should be triggered here and document2 should be // indexed. EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_get_result_proto.mutable_document() = message1; // DocumentStore kept the additional document EXPECT_THAT(icing.Get("namespace", "message/1", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); *expected_get_result_proto.mutable_document() = message2; EXPECT_THAT(icing.Get("namespace", "message/2", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = message2; *expected_search_result_proto.mutable_results()->Add()->mutable_document() = message1; // We indexed the additional document in all indices. // Verify term search SearchSpecProto search_spec1; search_spec1.set_query("message"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); // Verify numeric (integer) search SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto search_result_google::protobuf = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); // Verify join search: join a query for `name:person` with a child query for // `body:message` based on the child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:message"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto expected_join_search_result_proto; expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); SearchResultProto::ResultProto* result_proto = expected_join_search_result_proto.mutable_results()->Add(); *result_proto->mutable_document() = person; *result_proto->mutable_joined_results()->Add()->mutable_document() = message2; *result_proto->mutable_joined_results()->Add()->mutable_document() = message1; SearchResultProto search_result_proto3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( expected_join_search_result_proto)); } TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) { // Test the following scenario: term index is corrupted (e.g. checksum doesn't // match). IcingSearchEngine should be able to recover term index. Several // additional behaviors are also tested: // - Index directory handling: // - Should discard the entire term index directory and start it from // scratch. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect since we start it // from scratch. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); SearchSpecProto search_spec; search_spec.set_query("body:message"); search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = message; { // Initializes folder and schema, index one document TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Manually corrupt term index { const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb"; ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str())); ASSERT_TRUE(fd.is_valid()); ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); } // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should be discarded once. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(1); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory "*/integer_index_dir/*" // should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); EXPECT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Check that our index is ok by searching over the restored index SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) { // Test the following scenario: integer index is corrupted (e.g. checksum // doesn't match). IcingSearchEngine should be able to recover integer index. // Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Should discard the entire integer index directory and start it from // scratch. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded, since we start it from scratch. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); SearchSpecProto search_spec; search_spec.set_query("indexableInteger == 123"); search_spec.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = message; { // Initializes folder and schema, index one document TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Manually corrupt integer index { const std::string integer_index_metadata_file = GetIntegerIndexDir() + "/integer_index.m"; ScopedFd fd( filesystem()->OpenForWrite(integer_index_metadata_file.c_str())); ASSERT_TRUE(fd.is_valid()); ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); } // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should be discarded once, and Clear() // should never be called (i.e. storage sub directory "*/integer_index_dir/*" // should never be discarded) since we start it from scratch. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(1); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); EXPECT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Check that our index is ok by searching over the restored index SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptQualifiedIdJoinIndex) { // Test the following scenario: qualified id join index is corrupted (e.g. // checksum doesn't match). IcingSearchEngine should be able to recover // qualified id join index. Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Should discard the entire qualified id join index directory and start // it from scratch. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded, since we start // it from scratch. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // Prepare join search spec to join a query for `name:person` with a child // query for `body:message` based on the child's `senderQualifiedId` field. SearchSpecProto search_spec; search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec.set_query("name:person"); JoinSpecProto* join_spec = search_spec.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:message"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec = ResultSpecProto::default_instance(); result_spec.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); SearchResultProto::ResultProto* result_proto = expected_search_result_proto.mutable_results()->Add(); *result_proto->mutable_document() = person; *result_proto->mutable_joined_results()->Add()->mutable_document() = message; { // Initializes folder and schema, index one document TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Manually corrupt qualified id join index { const std::string qualified_id_join_index_metadata_file = GetQualifiedIdJoinIndexDir() + "/metadata"; ScopedFd fd(filesystem()->OpenForWrite( qualified_id_join_index_metadata_file.c_str())); ASSERT_TRUE(fd.is_valid()); ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); } // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory "*/integer_index_dir/*" // should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should be discarded once, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(1); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); EXPECT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::IO_ERROR)); // Check that our index is ok by searching over the restored index SearchResultProto search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) { // Test the following scenario: losing the entire term index. Since we need // flash index magic to determine the version, in this test we will throw out // the entire term index and re-initialize an empty one, to bypass // undetermined version state change and correctly trigger "lose term index" // scenario. // IcingSearchEngine should be able to recover term index. Several additional // behaviors are also tested: // - Index directory handling: // - Term index directory should not be discarded (but instead just being // rebuilt by replaying all docs). // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect since it is empty. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with 3 message documents. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Delete and re-initialize an empty term index to trigger // RestoreIndexIfNeeded. { std::string idx_subdir = GetIndexDir() + "/idx"; ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100), filesystem(), icing_filesystem())); ICING_ASSERT_OK(index->PersistToDisk()); } // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded since we've already // lost it. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory // "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // All documents should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) { // Test the following scenario: losing the entire integer index directory. // IcingSearchEngine should be able to recover integer index. Several // additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should not be discarded since we've already // lost it. Start it from scratch. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded, since we start it from scratch. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with 3 message documents. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Delete the integer index file to trigger RestoreIndexIfNeeded. std::string integer_index_dir = GetIntegerIndexDir(); filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str()); // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded since we've // already lost it, and Clear() should never be called (i.e. storage sub // directory "*/integer_index_dir/*" should never be discarded) since we // start it from scratch. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // All documents should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseQualifiedIdJoinIndex) { // Test the following scenario: losing the entire qualified id join index // directory. IcingSearchEngine should be able to recover qualified id join // index. Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should not be discarded since we've // already lost it. Start it from scratch. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded, since we start // it from scratch. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with 3 message documents. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded. std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir(); filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str()); // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded since we've // already lost it, and Clear() should never be called (i.e. storage sub // directory "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded) // since we start it from scratch. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // All documents should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateLiteIndexWithoutReindexing) { // Test the following scenario: term lite index is *completely* ahead of // document store. IcingSearchEngine should be able to recover term index. // Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index should take effect and throw out the // entire lite index. This should be sufficient to make term index // consistent with document store, so reindexing should not take place. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with a LiteIndex that will only allow a person and a // message document before needing a merge. { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(person.ByteSizeLong() + message.ByteSizeLong()); TestIcingSearchEngine icing(options, std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); // Add two message documents. These should get merged into the main index. EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Manually add some data into term lite index and increment // last_added_document_id, but don't merge into the main index. This will // cause mismatched last_added_document_id with term index. // - Document store: [0, 1, 2] // - Term index // - Main index: [0, 1, 2] // - Lite index: [3] // - Integer index: [0, 1, 2] // - Qualified id join index: [0, 1, 2] { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), filesystem(), icing_filesystem())); DocumentId original_last_added_doc_id = index->last_added_document_id(); index->set_last_added_document_id(original_last_added_doc_id + 1); Index::Editor editor = index->Edit(original_last_added_doc_id + 1, /*section_id=*/0, TermMatchType::EXACT_ONLY, /*namespace_id=*/0); ICING_ASSERT_OK(editor.BufferTerm("foo")); ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); } // 3. Create the index again. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. since we only call // TruncateTo for term index. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory // "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(message.ByteSizeLong()); TestIcingSearchEngine icing(options, std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); // Since truncating lite index is sufficient to make term index consistent // with document store, replaying documents or reindex shouldn't take place. EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // Only the documents that were in the main index should be retrievable. ASSERT_THAT(results1.results(), SizeIs(2)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(2)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/1")); } // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to // verify the correctness of term index restoration. Instead, we have to check // hits for "foo" should not be found in term index. { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0, kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); EXPECT_THAT(doc_hit_info_iter->Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateLiteIndexWithReindexing) { // Test the following scenario: term lite index is *partially* ahead of // document store. IcingSearchEngine should be able to recover term index. // Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index should take effect and throw out the // entire lite index. However, some valid data in term lite index were // discarded together, so reindexing should still take place to recover // them after truncating. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with a LiteIndex that will only allow a person and a // message document before needing a merge. { IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(message.ByteSizeLong()); TestIcingSearchEngine icing(options, std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); // Add two message documents. These should get merged into the main index. EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); // Add one document. This one should get remain in the lite index. message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Manually add some data into term lite index and increment // last_added_document_id, but don't merge into the main index. This will // cause mismatched last_added_document_id with term index. // - Document store: [0, 1, 2, 3] // - Term index // - Main index: [0, 1, 2] // - Lite index: [3, 4] // - Integer index: [0, 1, 2, 3] // - Qualified id join index: [0, 1, 2, 3] { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), filesystem(), icing_filesystem())); DocumentId original_last_added_doc_id = index->last_added_document_id(); index->set_last_added_document_id(original_last_added_doc_id + 1); Index::Editor editor = index->Edit(original_last_added_doc_id + 1, /*section_id=*/0, TermMatchType::EXACT_ONLY, /*namespace_id=*/0); ICING_ASSERT_OK(editor.BufferTerm("foo")); ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); } // 3. Create the index again. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. since we only call // TruncateTo for term index. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory // "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); IcingSearchEngineOptions options = GetDefaultIcingOptions(); options.set_index_merge_size(message.ByteSizeLong()); TestIcingSearchEngine icing(options, std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); // Truncating lite index not only deletes data ahead document store, but // also deletes valid data. Therefore, we still have to replay documents and // reindex. EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // Only the documents that were in the main index should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); } // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to // verify the correctness of term index restoration. Instead, we have to check // hits for "foo" should not be found in term index. { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0, kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); EXPECT_THAT(doc_hit_info_iter->Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateMainIndexWithoutReindexing) { // Test the following scenario: term main index is *completely* ahead of // document store. IcingSearchEngine should be able to recover term index. // Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index should take effect and throw out the // entire lite and main index. This should be sufficient to make term // index consistent with document store (in this case, document store is // empty as well), so reindexing should not take place. // - "Clear()" should be called for integer index. It is a special case when // document store has no document. Since there is no integer index storage // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be // discarded. // - "Clear()" should be called for qualified id join index. It is a special // case when document store has no document. // 1. Create an index with no document. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); } // 2. Manually add some data into term lite index and increment // last_added_document_id. Merge some of them into the main index and keep // others in the lite index. This will cause mismatched document id with // document store. // - Document store: [] // - Term index // - Main index: [0] // - Lite index: [1] // - Integer index: [] // - Qualified id join index: [] { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create( // index merge size is not important here because we will manually // invoke merge below. Index::Options(GetIndexDir(), /*index_merge_size=*/100), filesystem(), icing_filesystem())); // Add hits for document 0 and merge. ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId); index->set_last_added_document_id(0); Index::Editor editor = index->Edit(/*document_id=*/0, /*section_id=*/0, TermMatchType::EXACT_ONLY, /*namespace_id=*/0); ICING_ASSERT_OK(editor.BufferTerm("foo")); ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); ICING_ASSERT_OK(index->Merge()); // Add hits for document 1 and don't merge. index->set_last_added_document_id(1); editor = index->Edit(/*document_id=*/1, /*section_id=*/0, TermMatchType::EXACT_ONLY, /*namespace_id=*/0); ICING_ASSERT_OK(editor.BufferTerm("bar")); ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); } // 3. Create the index again. This should throw out the lite and main index. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. since we only call // TruncateTo for term index. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded. Even though // Clear() was called, it shouldn't take effect since there is no storage // sub directory ("*/integer_index_dir/*") and nothing will be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded. // Clear() was called and should discard and reinitialize the underlying // mapper. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(AtLeast(1)); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); // Since truncating main index is sufficient to make term index consistent // with document store, replaying documents or reindexing shouldn't take // place. EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); } // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not // enough to verify the correctness of term index restoration. Instead, we // have to check hits for "foo", "bar" should not be found in term index. { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100), filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0, kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); EXPECT_THAT(doc_hit_info_iter->Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); ICING_ASSERT_OK_AND_ASSIGN( doc_hit_info_iter, index->GetIterator("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0, kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); EXPECT_THAT(doc_hit_info_iter->Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateMainIndexWithReindexing) { // Test the following scenario: term main index is *partially* ahead of // document store. IcingSearchEngine should be able to recover term index. // Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - In RestoreIndexIfNecessary(): // - "TruncateTo()" for term index should take effect and throw out the // entire lite and main index. However, some valid data in term main index // were discarded together, so reindexing should still take place to // recover them after truncating. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with 3 message documents. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Manually add some data into term lite index and increment // last_added_document_id. Merge some of them into the main index and keep // others in the lite index. This will cause mismatched document id with // document store. // - Document store: [0, 1, 2, 3] // - Term index // - Main index: [0, 1, 2, 3, 4] // - Lite index: [5] // - Integer index: [0, 1, 2, 3] // - Qualified id join index: [0, 1, 2, 3] { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), filesystem(), icing_filesystem())); // Add hits for document 4 and merge. DocumentId original_last_added_doc_id = index->last_added_document_id(); index->set_last_added_document_id(original_last_added_doc_id + 1); Index::Editor editor = index->Edit(original_last_added_doc_id + 1, /*section_id=*/0, TermMatchType::EXACT_ONLY, /*namespace_id=*/0); ICING_ASSERT_OK(editor.BufferTerm("foo")); ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); ICING_ASSERT_OK(index->Merge()); // Add hits for document 5 and don't merge. index->set_last_added_document_id(original_last_added_doc_id + 2); editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0, TermMatchType::EXACT_ONLY, /*namespace_id=*/0); ICING_ASSERT_OK(editor.BufferTerm("bar")); ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); } // 3. Create the index again. This should throw out the lite and main index // and trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. since we only call // TruncateTo for term index. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory // "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); // Truncating main index not only deletes data ahead document store, but // also deletes valid data. Therefore, we still have to replay documents and // reindex. EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // Only the first document should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); } // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not // enough to verify the correctness of term index restoration. Instead, we // have to check hits for "foo", "bar" should not be found in term index. { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100), filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0, kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); EXPECT_THAT(doc_hit_info_iter->Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); ICING_ASSERT_OK_AND_ASSIGN( doc_hit_info_iter, index->GetIterator("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0, kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); EXPECT_THAT(doc_hit_info_iter->Advance(), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateIntegerIndexWithoutReindexing) { // Test the following scenario: integer index is *completely* ahead of // document store. IcingSearchEngine should be able to recover integer index. // Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" should be called for integer index and throw out all integer // index storages, i.e. all storage sub directories (path_expr = // "*/integer_index_dir/*") should be discarded. This should be sufficient // to make integer index consistent with document store (in this case, // document store is empty as well), so reindexing should not take place. // - "Clear()" should be called for qualified id join index. It is a special // case when document store has no document. // 1. Create an index with no document. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); } // 2. Manually add some data into integer index and increment // last_added_document_id. This will cause mismatched document id with // document store. // - Document store: [] // - Term index: [] // - Integer index: [0] // - Qualified id join index: [] { Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr integer_index, IntegerIndex::Create(filesystem, GetIntegerIndexDir(), /*pre_mapping_fbv=*/false)); // Add hits for document 0. ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId); integer_index->set_last_added_document_id(0); std::unique_ptr::Editor> editor = integer_index->Edit( /*property_path=*/"indexableInteger", /*document_id=*/0, /*section_id=*/0); ICING_ASSERT_OK(editor->BufferKey(123)); ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys()); } // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); // Clear() should be called to truncate integer index and thus storage sub // directory (path_expr = "*/integer_index_dir/*") should be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(1); // Ensure qualified id join index directory should never be discarded. // Clear() was called and should discard and reinitialize the underlying // mapper. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(AtLeast(1)); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Since truncating integer index is sufficient to make it consistent with // document store, replaying documents or reindexing shouldn't take place. EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify that numeric query safely wiped out the pre-existing hit for // 'indexableInteger' == 123. Add a new document without that value for // 'indexableInteger' that will take docid=0. If the integer index was not // rebuilt correctly, then it will still have the previously added hit for // 'indexableInteger' == 123 for docid 0 and incorrectly return this new // doc in a query. DocumentProto another_message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 456) .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk()); // Verify integer index works normally SearchSpecProto search_spec; search_spec.set_query("indexableInteger == 123"); search_spec.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results = icing.Search(search_spec, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results.results(), IsEmpty()); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateIntegerIndexWithReindexing) { // Test the following scenario: integer index is *partially* ahead of document // store. IcingSearchEngine should be able to recover integer index. Several // additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" should be called for integer index and throw out all integer // index storages, i.e. all storage sub directories (path_expr = // "*/integer_index_dir/*") should be discarded. However, some valid data // in integer index were discarded together, so reindexing should still // take place to recover them after clearing. // - "Clear()" shouldn't be called for qualified id join index, i.e. no // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_OPTIONAL)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with message 3 documents. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } // 2. Manually add some data into integer index and increment // last_added_document_id. This will cause mismatched document id with // document store. // - Document store: [0, 1, 2, 3] // - Term index: [0, 1, 2, 3] // - Integer index: [0, 1, 2, 3, 4] // - Qualified id join index: [0, 1, 2, 3] { Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr integer_index, IntegerIndex::Create(filesystem, GetIntegerIndexDir(), /*pre_mapping_fbv=*/false)); // Add hits for document 4. DocumentId original_last_added_doc_id = integer_index->last_added_document_id(); integer_index->set_last_added_document_id(original_last_added_doc_id + 1); std::unique_ptr::Editor> editor = integer_index->Edit( /*property_path=*/"indexableInteger", /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0); ICING_ASSERT_OK(editor->BufferKey(456)); ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys()); } // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); // Clear() should be called to truncate integer index and thus storage sub // directory (path_expr = "*/integer_index_dir/*") should be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(1); // Ensure qualified id join index directory should never be discarded, and // Clear() should never be called (i.e. storage sub directory // "*/qualified_id_join_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(0); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // All documents should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); // Verify that numeric index safely wiped out the pre-existing hit for // 'indexableInteger' == 456. Add a new document without that value for // 'indexableInteger' that will take docid=0. If the integer index was not // rebuilt correctly, then it will still have the previously added hit for // 'indexableInteger' == 456 for docid 0 and incorrectly return this new // doc in a query. DocumentProto another_message = DocumentBuilder() .SetKey("namespace", "message/4") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk()); // Verify integer index works normally SearchSpecProto search_spec; search_spec.set_query("indexableInteger == 456"); search_spec.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results = icing.Search(search_spec, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results.results(), IsEmpty()); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) { // Test the following scenario: qualified id join index is *completely* ahead // of document store. IcingSearchEngine should be able to recover qualified id // join index. Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" should be called for integer index. It is a special case when // document store has no document. Since there is no integer index storage // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be // discarded. // - "Clear()" should be called for qualified id join index and throw out // all data, i.e. discarding the underlying mapper (path_expr = // "*/qualified_id_join_index_dir/*") and reinitialize. This should be // sufficient to make qualified id join index consistent with document // store (in this case, document store is empty as well), so reindexing // should not take place. // 1. Create an index with no document. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); } // 2. Manually add some data into integer index and increment // last_added_document_id. This will cause mismatched document id with // document store. // - Document store: [] // - Term index: [] // - Integer index: [] // - Qualified id join index: [0] { Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr qualified_id_join_index, QualifiedIdTypeJoinableIndex::Create( filesystem, GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); // Add data for document 0. ASSERT_THAT(qualified_id_join_index->last_added_document_id(), kInvalidDocumentId); qualified_id_join_index->set_last_added_document_id(0); ICING_ASSERT_OK(qualified_id_join_index->Put( DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0), /*ref_qualified_id_str=*/"namespace#person")); } // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded. Even though // Clear() was called, it shouldn't take effect since there is no storage // sub directory ("*/integer_index_dir/*") and nothing will be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); // Clear() should be called to truncate qualified id join index and thus // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(AtLeast(1)); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); // Since truncating qualified id join index is sufficient to make it // consistent with document store, replaying documents or reindexing // shouldn't take place. EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); } // 4. Since document 0 doesn't exist, testing join query is not enough to // verify the correctness of qualified id join index restoration. Instead, we // have to check the previously added data should not be found in qualified id // join index. { Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr qualified_id_join_index, QualifiedIdTypeJoinableIndex::Create( filesystem, GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); EXPECT_THAT(qualified_id_join_index->Get( DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) { // Test the following scenario: qualified id join index is *partially* ahead // of document store. IcingSearchEngine should be able to recover qualified id // join index. Several additional behaviors are also tested: // - Index directory handling: // - Term index directory should be unaffected. // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: // - "TruncateTo()" for term index shouldn't take effect. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. // - "Clear()" should be called for qualified id join index and throw out // all data, i.e. discarding the underlying mapper (path_expr = // "*/qualified_id_join_index_dir/*") and reinitialize. However, some // valid data in qualified id join index were discarded together, so // reindexing should still take place to recover them after clearing. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); // 1. Create an index with message 3 documents. { TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/2").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); message = DocumentBuilder(message).SetUri("message/3").Build(); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } DocJoinInfo additional_data_key; // 2. Manually add some data into qualified id join index and increment // last_added_document_id. This will cause mismatched document id with // document store. // - Document store: [0, 1, 2, 3] // - Term index: [0, 1, 2, 3] // - Integer index: [0, 1, 2, 3] // - Qualified id join index: [0, 1, 2, 3, 4] { Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr qualified_id_join_index, QualifiedIdTypeJoinableIndex::Create( filesystem, GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); // Add data for document 4. DocumentId original_last_added_doc_id = qualified_id_join_index->last_added_document_id(); qualified_id_join_index->set_last_added_document_id( original_last_added_doc_id + 1); additional_data_key = DocJoinInfo(/*document_id=*/original_last_added_doc_id + 1, /*joinable_property_id=*/0); ICING_ASSERT_OK(qualified_id_join_index->Put( additional_data_key, /*ref_qualified_id_str=*/"namespace#person")); } // 3. Create the index again. This should trigger index restoration. { // Mock filesystem to observe and check the behavior of all indices. auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) .WillRepeatedly(DoDefault()); // Ensure term index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/index_dir"))) .Times(0); // Ensure integer index directory should never be discarded, and Clear() // should never be called (i.e. storage sub directory // "*/integer_index_dir/*" should never be discarded). EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) .Times(0); EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) .Times(0); // Ensure qualified id join index directory should never be discarded. EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( EndsWith("/qualified_id_join_index_dir"))) .Times(0); // Clear() should be called to truncate qualified id join index and thus // underlying storage sub directory (path_expr = // "*/qualified_id_join_index_dir/*") should be discarded. EXPECT_CALL( *mock_filesystem, DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) .Times(AtLeast(1)); TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); ASSERT_THAT(initialize_result.status(), ProtoIsOk()); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); // Verify term index works normally SearchSpecProto search_spec1; search_spec1.set_query("body:consectetur"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto results1 = icing.Search(search_spec1, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(results1.status(), ProtoIsOk()); EXPECT_THAT(results1.next_page_token(), Eq(0)); // All documents should be retrievable. ASSERT_THAT(results1.results(), SizeIs(3)); EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); // Verify integer index works normally SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto results2 = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); ASSERT_THAT(results2.results(), SizeIs(3)); EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); // Verify qualified id join index works normally: join a query for // `name:person` with a child query for `body:consectetur` based on the // child's `senderQualifiedId` field. // Add document 4 without "senderQualifiedId". If joinable index is not // rebuilt correctly, then it will still have the previously added // senderQualifiedId for document 4 and include document 4 incorrectly in // the right side. DocumentProto another_message = DocumentBuilder() .SetKey("namespace", "message/4") .SetSchema("Message") .AddStringProperty("body", kIpsumText) .AddInt64Property("indexableInteger", 123) .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk()); SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:consectetur"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto results3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), Eq("message/3")); EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), Eq("message/2")); EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), Eq("message/1")); } } TEST_F(IcingSearchEngineInitializationTest, DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) { // 1. Create an index with a single document in it that has no indexed // content. { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); // Set a schema for a single type that has no indexed properties. SchemaProto schema = SchemaBuilder() .AddType( SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("unindexedField") .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("unindexedInteger") .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); // Add a document that contains no indexed properties. DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("unindexedField", "Don't you dare search over this!") .AddInt64Property("unindexedInteger", -123) .Build(); EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); } // 2. Create the index again. This should NOT trigger a recovery of any kind. { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); EXPECT_THAT(init_result.status(), ProtoIsOk()); EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( init_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(init_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); } } TEST_F(IcingSearchEngineInitializationTest, DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) { // 1. Create an index with a single document in it that has no valid indexed // tokens in its content. { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); SchemaProto schema = SchemaBuilder() .AddType( SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_OPTIONAL)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); // Set a schema for a single type that has no term, integer, join indexed // contents. ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); // Add a document that contains: // - No valid indexed string content - just punctuation // - No integer content - since it is an optional property // - No qualified id content - since it is an optional property DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "?...!") .Build(); EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); } // 2. Create the index again. This should NOT trigger a recovery of any kind. { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); EXPECT_THAT(init_result.status(), ProtoIsOk()); EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( init_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(init_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogFunctionLatency) { auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10)); } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogNumberOfDocuments) { DocumentProto document1 = DocumentBuilder() .SetKey("icing", "fake_type/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); DocumentProto document2 = DocumentBuilder() .SetKey("icing", "fake_type/2") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 456) .Build(); { // Initialize and put a document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), Eq(0)); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); } { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), Eq(1)); // Put another document. ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); } { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), Eq(2)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) { // Even though the fake timer will return 10, all the latency numbers related // to recovery / restoration should be 0 during the first-time initialization. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCausePartialDataLoss) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); { // Initialize and put a document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); } { // Append a non-checksummed document. This will mess up the checksum of the // proto log, forcing it to rewind and later return a DATA_LOSS error. const std::string serialized_document = document.SerializeAsString(); const std::string document_log_file = absl_ports::StrCat( GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str()); filesystem()->PWrite(document_log_file.c_str(), file_size, serialized_document.data(), serialized_document.size()); } { // Document store will rewind to previous checkpoint. The cause should be // DATA_LOSS and the data status should be PARTIAL_LOSS. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(10)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::PARTIAL_LOSS)); // Since document store rewinds to previous checkpoint, last stored doc id // will be consistent with last added document ids in term/integer indices, // so there will be no index restoration. EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(0)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseCompleteDataLoss) { DocumentProto document1 = DocumentBuilder() .SetKey("icing", "fake_type/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); const std::string document_log_file = absl_ports::StrCat( GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); int64_t corruptible_offset; { // Initialize and put a document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); // There's some space at the beginning of the file (e.g. header, kmagic, // etc) that is necessary to initialize the FileBackedProtoLog. We can't // corrupt that region, so we need to figure out the offset at which // documents will be written to - which is the file size after // initialization. corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); } { // "Corrupt" the content written in the log. Make the corrupt document // smaller than our original one so we don't accidentally write past our // file. DocumentProto document = DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); std::string serialized_document = document.SerializeAsString(); ASSERT_TRUE(filesystem()->PWrite( document_log_file.c_str(), corruptible_offset, serialized_document.data(), serialized_document.size())); PortableFileBackedProtoLog::Header header = ReadDocumentLogHeader(*filesystem(), document_log_file); // Set dirty bit to true to reflect that something changed in the log. header.SetDirtyFlag(true); header.SetHeaderChecksum(header.CalculateHeaderChecksum()); WriteDocumentLogHeader(*filesystem(), document_log_file, header); } { // Document store will completely rewind. The cause should be DATA_LOSS and // the data status should be COMPLETE_LOSS. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(10)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::COMPLETE_LOSS)); // The complete rewind of ground truth causes us to clear the index, but // that's not considered a restoration. EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(0)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); { // Initialize and put a document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); } { // Delete and re-initialize an empty index file to trigger // RestoreIndexIfNeeded. std::string idx_subdir = GetIndexDir() + "/idx"; ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100), filesystem(), icing_filesystem())); ICING_ASSERT_OK(index->PersistToDisk()); } { // Index is empty but ground truth is not. Index should be restored due to // the inconsistency. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } } TEST_F( IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); { // Initialize and put a document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); } { // Delete the integer index file to trigger RestoreIndexIfNeeded. std::string integer_index_dir = GetIntegerIndexDir(); filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str()); } { // Index is empty but ground truth is not. Index should be restored due to // the inconsistency. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } } TEST_F( IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); { // Initialize and put documents. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } { // Delete the qualified id join index file to trigger RestoreIndexIfNeeded. std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir(); filesystem()->DeleteDirectoryRecursively( qualified_id_join_index_dir.c_str()); } { // Index is empty but ground truth is not. Index should be restored due to // the inconsistency. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); IcingSearchEngineOptions options = GetDefaultIcingOptions(); { // Initialize and put one document. IcingSearchEngine icing(options, GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); } { // Simulate a schema change where power is lost after the schema is written. SchemaProto new_schema = SchemaBuilder() .AddType( SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig()) .AddProperty(PropertyConfigBuilder() .SetName("subject") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); // Write the marker file std::string marker_filepath = absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); ASSERT_TRUE(sfd.is_valid()); // Write the new schema FakeClock fake_clock; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); ICING_EXPECT_OK(schema_store->SetSchema( new_schema, /*ignore_errors_and_delete_documents=*/false, /*allow_circular_schema_definitions=*/false)); } { // Both document store and index should be recovered from checksum mismatch. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(10)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } { // No recovery should be needed. auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(0)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseIndexIOError) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); { // Initialize and put one document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); } std::string lite_index_buffer_file_path = absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb"); auto mock_icing_filesystem = std::make_unique(); EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_)) .WillRepeatedly(DoDefault()); // This fails Index::Create() once. EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(Eq(lite_index_buffer_file_path))) .WillOnce(Return(-1)) .WillRepeatedly(DoDefault()); auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::move(mock_icing_filesystem), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT( initialize_result_proto.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseIntegerIndexIOError) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); { // Initialize and put one document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); } std::string integer_index_metadata_file = absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m"); auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault()); // This fails IntegerIndex::Create() once. EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file))) .WillOnce(Return(-1)) .WillRepeatedly(DoDefault()); auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT( initialize_result_proto.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person = DocumentBuilder() .SetKey("namespace", "person") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message/1") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); { // Initialize and put documents. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(person).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(message).status(), ProtoIsOk()); } std::string qualified_id_join_index_metadata_file = absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata"); auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, PRead(A(), _, _, _)) .WillRepeatedly(DoDefault()); // This fails QualifiedIdTypeJoinableIndex::Create() once. EXPECT_CALL( *mock_filesystem, PRead(Matcher(Eq(qualified_id_join_index_metadata_file)), _, _, _)) .WillOnce(Return(false)) .WillRepeatedly(DoDefault()); auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT( initialize_result_proto.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseDocStoreIOError) { DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/0") .SetSchema("Message") .AddStringProperty("body", "message body") .AddInt64Property("indexableInteger", 123) .Build(); { // Initialize and put one document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); } std::string document_store_header_file_path = absl_ports::StrCat(GetDocumentDir(), "/document_store_header"); auto mock_filesystem = std::make_unique(); EXPECT_CALL(*mock_filesystem, Read(A(), _, _)) .WillRepeatedly(DoDefault()); // This fails DocumentStore::InitializeDerivedFiles() once. EXPECT_CALL( *mock_filesystem, Read(Matcher(Eq(document_store_header_file_path)), _, _)) .WillOnce(Return(false)) .WillRepeatedly(DoDefault()); auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::move(mock_filesystem), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(10)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().schema_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(0)); } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogRecoveryCauseSchemaStoreIOError) { { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); } { // Delete the schema store type mapper to trigger an I/O error. std::string schema_store_header_file_path = GetSchemaDir() + "/schema_type_mapper"; ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively( schema_store_header_file_path.c_str())); } { auto fake_clock = std::make_unique(); fake_clock->SetTimerElapsedMilliseconds(10); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_cause(), Eq(InitializeStatsProto::IO_ERROR)); EXPECT_THAT(initialize_result_proto.initialize_stats() .schema_store_recovery_latency_ms(), Eq(10)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .document_store_recovery_latency_ms(), Eq(0)); EXPECT_THAT( initialize_result_proto.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT( initialize_result_proto.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .integer_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::NONE)); EXPECT_THAT(initialize_result_proto.initialize_stats() .index_restoration_latency_ms(), Eq(0)); } } TEST_F(IcingSearchEngineInitializationTest, InitializeShouldLogNumberOfSchemaTypes) { { // Initialize an empty storage. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); // There should be 0 schema types. EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), Eq(0)); // Set a schema with one type config. ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); } { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); // There should be 1 schema type. EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), Eq(1)); // Create and set a schema with two type configs: Email and Message. SchemaProto schema = CreateEmailSchema(); *schema.add_types() = CreateMessageSchemaTypeConfig(); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); } { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); InitializeResultProto initialize_result_proto = icing.Initialize(); EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), Eq(2)); } } class IcingSearchEngineInitializationVersionChangeTest : public IcingSearchEngineInitializationTest, public ::testing::WithParamInterface {}; TEST_P(IcingSearchEngineInitializationVersionChangeTest, RecoverFromVersionChange) { // TODO(b/280697513): test backup schema migration // Test the following scenario: version change. All derived data should be // rebuilt. We test this by manually adding some invalid derived data and // verifying they're removed due to rebuild. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( PropertyConfigBuilder() .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Message") .AddProperty(PropertyConfigBuilder() .SetName("body") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("indexableInteger") .SetDataTypeInt64(NUMERIC_MATCH_RANGE) .SetCardinality(CARDINALITY_REQUIRED)) .AddProperty(PropertyConfigBuilder() .SetName("senderQualifiedId") .SetDataTypeJoinableString( JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); DocumentProto person1 = DocumentBuilder() .SetKey("namespace", "person/1") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto person2 = DocumentBuilder() .SetKey("namespace", "person/2") .SetSchema("Person") .AddStringProperty("name", "person") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); DocumentProto message = DocumentBuilder() .SetKey("namespace", "message") .SetSchema("Message") .AddStringProperty("body", "correct message") .AddInt64Property("indexableInteger", 123) .AddStringProperty("senderQualifiedId", "namespace#person/1") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); { // Initializes folder and schema, index person1 and person2 TestIcingSearchEngine icing( GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk()); } // This should shut down IcingSearchEngine and persist anything it needs to { // Manually: // - Put message into DocumentStore // - But add some incorrect data for message into 3 indices // - Change version file // // These will make sure last_added_document_id is consistent with // last_stored_document_id, so if Icing didn't handle version change // correctly, then the index won't be rebuilt. FakeClock fake_clock; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); // Put message into DocumentStore ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(), /*force_recovery_and_revalidate_documents=*/false, /*namespace_id_fingerprint=*/false, PortableFileBackedProtoLog< DocumentWrapper>::kDeflateCompressionLevel, /*initialize_stats=*/nullptr)); std::unique_ptr document_store = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message)); // Index doc_id with incorrect data Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index, Index::Create(options, filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr integer_index, IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(), /*pre_mapping_fbv=*/false)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr qualified_id_join_index, QualifiedIdTypeJoinableIndex::Create( *filesystem(), GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr string_section_indexing_handler, StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(), index.get())); ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr integer_section_indexing_handler, IntegerSectionIndexingHandler::Create( &fake_clock, integer_index.get())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr qualified_id_joinable_property_indexing_handler, QualifiedIdJoinIndexingHandler::Create(&fake_clock, qualified_id_join_index.get())); std::vector> handlers; handlers.push_back(std::move(string_section_indexing_handler)); handlers.push_back(std::move(integer_section_indexing_handler)); handlers.push_back( std::move(qualified_id_joinable_property_indexing_handler)); IndexProcessor index_processor(std::move(handlers), &fake_clock); DocumentProto incorrect_message = DocumentBuilder() .SetKey("namespace", "message") .SetSchema("Message") .AddStringProperty("body", "wrong message") .AddInt64Property("indexableInteger", 456) .AddStringProperty("senderQualifiedId", "namespace#person/2") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); ICING_ASSERT_OK_AND_ASSIGN( TokenizedDocument tokenized_document, TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(), std::move(incorrect_message))); ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id)); // Change existing data's version file const version_util::VersionInfo& existing_version_info = GetParam(); ICING_ASSERT_OK(version_util::WriteVersion( *filesystem(), GetVersionFilename(), existing_version_info)); } // Mock filesystem to observe and check the behavior of all indices. TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::make_unique(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); EXPECT_THAT(initialize_result.status(), ProtoIsOk()); // Index Restoration should be triggered here. Incorrect data should be // deleted and correct data of message should be indexed. EXPECT_THAT( initialize_result.initialize_stats().document_store_recovery_cause(), Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), Eq(InitializeStatsProto::VERSION_CHANGED)); // Manually check version file ICING_ASSERT_OK_AND_ASSIGN( version_util::VersionInfo version_info_after_init, version_util::ReadVersion(*filesystem(), GetVersionFilename(), GetIndexDir())); EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion)); EXPECT_THAT(version_info_after_init.max_version, Eq(std::max(version_util::kVersion, GetParam().max_version))); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = message; // Verify term search SearchSpecProto search_spec1; search_spec1.set_query("body:correct"); search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); // Verify numeric (integer) search SearchSpecProto search_spec2; search_spec2.set_query("indexableInteger == 123"); search_spec2.set_search_type( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); SearchResultProto search_result_google::protobuf = icing.Search(search_spec2, ScoringSpecProto::default_instance(), ResultSpecProto::default_instance()); EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); // Verify join search: join a query for `name:person` with a child query for // `body:message` based on the child's `senderQualifiedId` field. SearchSpecProto search_spec3; search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); join_spec->set_aggregation_scoring_strategy( JoinSpecProto::AggregationScoringStrategy::COUNT); JoinSpecProto::NestedSpecProto* nested_spec = join_spec->mutable_nested_spec(); SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); nested_search_spec->set_query("body:message"); *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); result_spec3.set_max_joined_children_per_parent_to_return( std::numeric_limits::max()); SearchResultProto expected_join_search_result_proto; expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); // Person 1 with message SearchResultProto::ResultProto* result_proto = expected_join_search_result_proto.mutable_results()->Add(); *result_proto->mutable_document() = person1; *result_proto->mutable_joined_results()->Add()->mutable_document() = message; // Person 2 without children *expected_join_search_result_proto.mutable_results() ->Add() ->mutable_document() = person2; SearchResultProto search_result_proto3 = icing.Search( search_spec3, ScoringSpecProto::default_instance(), result_spec3); EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( expected_join_search_result_proto)); } INSTANTIATE_TEST_SUITE_P( IcingSearchEngineInitializationVersionChangeTest, IcingSearchEngineInitializationVersionChangeTest, testing::Values( // Manually change existing data set's version to kVersion + 1. When // initializing, it will detect "rollback". version_util::VersionInfo( /*version_in=*/version_util::kVersion + 1, /*max_version_in=*/version_util::kVersion + 1), // Manually change existing data set's version to kVersion - 1 and // max_version to kVersion - 1. When initializing, it will detect // "upgrade". version_util::VersionInfo( /*version_in=*/version_util::kVersion - 1, /*max_version_in=*/version_util::kVersion - 1), // Manually change existing data set's version to kVersion - 1 and // max_version to kVersion. When initializing, it will detect "roll // forward". version_util::VersionInfo( /*version_in=*/version_util::kVersion - 1, /*max_version_in=*/version_util::kVersion), // Manually change existing data set's version to 0 and max_version to // 0. When initializing, it will detect "version 0 upgrade". // // Note: in reality, version 0 won't be written into version file, but // it is ok here since it is hack to simulate version 0 situation. version_util::VersionInfo( /*version_in=*/0, /*max_version_in=*/0), // Manually change existing data set's version to 0 and max_version to // kVersion. When initializing, it will detect "version 0 roll forward". // // Note: in reality, version 0 won't be written into version file, but // it is ok here since it is hack to simulate version 0 situation. version_util::VersionInfo( /*version_in=*/0, /*max_version_in=*/version_util::kVersion))); } // namespace } // namespace lib } // namespace icing