diff options
author | Tim Barron <tjbarron@google.com> | 2021-06-30 20:23:15 +0000 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2021-07-02 04:02:00 +0000 |
commit | 0dc99ac82a5cb45a6795d535d4df492f7f44b686 (patch) | |
tree | 49edf1ba008b63bfa2ed8276d421d5ca6f62ec1b | |
parent | 672648070b7d9c7e066d941f52f8a4b682cf963f (diff) | |
download | icing-0dc99ac82a5cb45a6795d535d4df492f7f44b686.tar.gz |
Merge remote-tracking branch 'goog/androidx-platform-dev'
Per yamasani@'s request, I have broken up ag/15113841
[I1d9d017f6d225c292295b2fbccd42ea731ef40c0] into smaller chunks to
simplify review.
This change should only include the changes for:
- Switch DocumentStore to using PortableFileBackedProtoLog instead of
FileBackedProtoLog.
I've made one additional change from the state of the relevant code
compared to ag/15113841. Per discussion with yamasani@ and adorokhine@,
we decided to skip the migration and just accept data loss given the
small population of devices that will have existing AppSearch data in
system_server. The old migration code still exists, but is behind the
ENABLE_V1_MIGRATION preprocessor macro which will not be defined.
Bug: 185806837
Test: Presubmit
Change-Id: I589fa722bf2695127a127db550113d937d5590f4
-rw-r--r-- | icing/file/file-backed-proto-log.h | 49 | ||||
-rw-r--r-- | icing/file/file-backed-proto-log_benchmark.cc | 40 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log.h | 40 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log_benchmark.cc | 40 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log_test.cc | 20 | ||||
-rw-r--r-- | icing/icing-search-engine_benchmark.cc | 114 | ||||
-rw-r--r-- | icing/icing-search-engine_test.cc | 285 | ||||
-rw-r--r-- | icing/portable/platform.h | 16 | ||||
-rw-r--r-- | icing/store/document-log-creator.cc | 206 | ||||
-rw-r--r-- | icing/store/document-log-creator.h | 77 | ||||
-rw-r--r-- | icing/store/document-store.cc | 49 | ||||
-rw-r--r-- | icing/store/document-store.h | 29 | ||||
-rw-r--r-- | icing/store/document-store_benchmark.cc | 87 | ||||
-rw-r--r-- | icing/store/document-store_test.cc | 241 |
14 files changed, 1148 insertions, 145 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h index 9ccd81b..b2b37e8 100644 --- a/icing/file/file-backed-proto-log.h +++ b/icing/file/file-backed-proto-log.h @@ -80,23 +80,6 @@ namespace icing { namespace lib { -namespace { - -bool IsEmptyBuffer(const char* buffer, int size) { - return std::all_of(buffer, buffer + size, - [](const char byte) { return byte == 0; }); -} - -// Helper function to get stored proto size from the metadata. -// Metadata format: 8 bits magic + 24 bits size -int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; } - -// Helper function to get stored proto magic from the metadata. -// Metadata format: 8 bits magic + 24 bits size -uint8_t GetProtoMagic(int metadata) { return metadata >> 24; } - -} // namespace - template <typename ProtoT> class FileBackedProtoLog { public: @@ -402,6 +385,28 @@ class FileBackedProtoLog { const Filesystem* filesystem, const std::string& file_path, Crc32 initial_crc, int64_t start, int64_t end); + static bool IsEmptyBuffer(const char* buffer, int size) { + return std::all_of(buffer, buffer + size, + [](const char byte) { return byte == 0; }); + } + + // Helper function to get stored proto size from the metadata. + // Metadata format: 8 bits magic + 24 bits size + static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; } + + // Helper function to get stored proto magic from the metadata. + // Metadata format: 8 bits magic + 24 bits size + static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; } + + // Reads out the metadata of a proto located at file_offset from the file. + // + // Returns: + // Proto's metadata on success + // OUT_OF_RANGE_ERROR if file_offset exceeds file_size + // INTERNAL_ERROR if the metadata is invalid or any IO errors happen + static libtextclassifier3::StatusOr<int> ReadProtoMetadata( + MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size); + // Magic number added in front of every proto. Used when reading out protos // as a first check for corruption in each entry in the file. Even if there is // a corruption, the best we can do is roll back to our last recovery point @@ -429,15 +434,6 @@ class FileBackedProtoLog { ScopedFd fd_; const Filesystem* const filesystem_; const std::string file_path_; - - // Reads out the metadata of a proto located at file_offset from the file. - // - // Returns: - // Proto's metadata on success - // OUT_OF_RANGE_ERROR if file_offset exceeds file_size - // INTERNAL_ERROR if the metadata is invalid or any IO errors happen - static libtextclassifier3::StatusOr<int> ReadProtoMetadata( - MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size); std::unique_ptr<Header> header_; }; @@ -573,6 +569,7 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem, ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum, ComputeChecksum(filesystem, file_path, Crc32(), sizeof(Header), file_size)); + // Double check that the log checksum is the same as the one that was // persisted last time. If not, we start recovery logic. if (header->log_checksum != calculated_log_checksum.Get()) { diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc index 766cc64..c09fd5a 100644 --- a/icing/file/file-backed-proto-log_benchmark.cc +++ b/icing/file/file-backed-proto-log_benchmark.cc @@ -164,6 +164,46 @@ BENCHMARK(BM_Read) // 16MiB, and we need some extra space for the // rest of the document properties +static void BM_Erase(benchmark::State& state) { + const Filesystem filesystem; + const std::string file_path = IcingStringUtil::StringPrintf( + "%s%s", GetTestTempDir().c_str(), "/proto.log"); + int max_proto_size = (1 << 24) - 1; // 16 MiB + bool compress = true; + + // Make sure it doesn't already exist. + filesystem.DeleteFile(file_path.c_str()); + + auto proto_log = + FileBackedProtoLog<DocumentProto>::Create( + &filesystem, file_path, + FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size)) + .ValueOrDie() + .proto_log; + + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + + std::default_random_engine random; + const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random); + + auto document_properties = document.add_properties(); + document_properties->set_name("string property"); + document_properties->add_string_values(rand_str); + + for (auto _ : state) { + state.PauseTiming(); + ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset, + proto_log->WriteProto(document)); + state.ResumeTiming(); + + testing::DoNotOptimize(proto_log->EraseProto(write_offset)); + } + + // Cleanup after ourselves + filesystem.DeleteFile(file_path.c_str()); +} +BENCHMARK(BM_Erase); + static void BM_ComputeChecksum(benchmark::State& state) { const Filesystem filesystem; const std::string file_path = GetTestTempDir() + "/proto.log"; diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h index 000ab3d..825b763 100644 --- a/icing/file/portable-file-backed-proto-log.h +++ b/icing/file/portable-file-backed-proto-log.h @@ -83,28 +83,6 @@ namespace icing { namespace lib { -namespace { - -// Number of bytes we reserve for the heading at the beginning of the proto log. -// We reserve this so the header can grow without running into the contents of -// the proto log, triggering an unnecessary migration of the data. -constexpr int kHeaderReservedBytes = 256; - -bool IsEmptyBuffer(const char* buffer, int size) { - return std::all_of(buffer, buffer + size, - [](const char byte) { return byte == 0; }); -} - -// Helper function to get stored proto size from the metadata. -// Metadata format: 8 bits magic + 24 bits size -int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; } - -// Helper function to get stored proto magic from the metadata. -// Metadata format: 8 bits magic + 24 bits size -uint8_t GetProtoMagic(int metadata) { return metadata >> 24; } - -} // namespace - template <typename ProtoT> class PortableFileBackedProtoLog { public: @@ -135,6 +113,11 @@ class PortableFileBackedProtoLog { : compress(compress_in), max_proto_size(max_proto_size_in) {} }; + // Number of bytes we reserve for the heading at the beginning of the proto + // log. We reserve this so the header can grow without running into the + // contents of the proto log, triggering an unnecessary migration of the data. + static constexpr int kHeaderReservedBytes = 256; + // Header stored at the beginning of the file before the rest of the log // contents. Stores metadata on the log. class Header { @@ -541,6 +524,19 @@ class PortableFileBackedProtoLog { static libtextclassifier3::Status WriteProtoMetadata( const Filesystem* filesystem, int fd, int32_t host_order_metadata); + static bool IsEmptyBuffer(const char* buffer, int size) { + return std::all_of(buffer, buffer + size, + [](const char byte) { return byte == 0; }); + } + + // Helper function to get stored proto size from the metadata. + // Metadata format: 8 bits magic + 24 bits size + static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; } + + // Helper function to get stored proto magic from the metadata. + // Metadata format: 8 bits magic + 24 bits size + static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; } + // Magic number added in front of every proto. Used when reading out protos // as a first check for corruption in each entry in the file. Even if there is // a corruption, the best we can do is roll back to our last recovery point diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc index b1dfe12..04ccab0 100644 --- a/icing/file/portable-file-backed-proto-log_benchmark.cc +++ b/icing/file/portable-file-backed-proto-log_benchmark.cc @@ -163,6 +163,46 @@ BENCHMARK(BM_Read) ->Arg(15 * 1024 * 1024); // We do 15MiB here since our max proto size is // 16MiB, and we need some extra space for the // rest of the document properties + // +static void BM_Erase(benchmark::State& state) { + const Filesystem filesystem; + const std::string file_path = IcingStringUtil::StringPrintf( + "%s%s", GetTestTempDir().c_str(), "/proto.log"); + int max_proto_size = (1 << 24) - 1; // 16 MiB + bool compress = true; + + // Make sure it doesn't already exist. + filesystem.DeleteFile(file_path.c_str()); + + auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem, file_path, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress, max_proto_size)) + .ValueOrDie() + .proto_log; + + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + + std::default_random_engine random; + const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random); + + auto document_properties = document.add_properties(); + document_properties->set_name("string property"); + document_properties->add_string_values(rand_str); + + for (auto _ : state) { + state.PauseTiming(); + ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset, + proto_log->WriteProto(document)); + state.ResumeTiming(); + + testing::DoNotOptimize(proto_log->EraseProto(write_offset)); + } + + // Cleanup after ourselves + filesystem.DeleteFile(file_path.c_str()); +} +BENCHMARK(BM_Erase); static void BM_ComputeChecksum(benchmark::State& state) { const Filesystem filesystem; diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc index 69b8a1a..b5fee4b 100644 --- a/icing/file/portable-file-backed-proto-log_test.cc +++ b/icing/file/portable-file-backed-proto-log_test.cc @@ -113,7 +113,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) { // With no protos written yet, the log should be minimum the size of the // reserved header space. - ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()), kHeaderReservedBytes); + ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()), + PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes); } TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) { @@ -417,8 +418,9 @@ TEST_F(PortableFileBackedProtoLogTest, // We still have the corrupted content in our file, we didn't throw // everything out. - EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), - Gt(kHeaderReservedBytes)); + EXPECT_THAT( + filesystem_.GetFileSize(file_path_.c_str()), + Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes)); } } @@ -456,9 +458,10 @@ TEST_F(PortableFileBackedProtoLogTest, DocumentProto document = DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); std::string serialized_document = document.SerializeAsString(); - ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), kHeaderReservedBytes, - serialized_document.data(), - serialized_document.size())); + ASSERT_TRUE(filesystem_.PWrite( + file_path_.c_str(), + PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes, + serialized_document.data(), serialized_document.size())); Header header = ReadHeader(filesystem_, file_path_); @@ -484,8 +487,9 @@ TEST_F(PortableFileBackedProtoLogTest, EXPECT_TRUE(create_result.recalculated_checksum); // We lost everything, file size is back down to the header. - EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), - Eq(kHeaderReservedBytes)); + EXPECT_THAT( + filesystem_.GetFileSize(file_path_.c_str()), + Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes)); // At least the log is no longer dirty. Header header = ReadHeader(filesystem_, file_path_); diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index b437724..316b74f 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -577,6 +577,120 @@ void BM_RepeatedPut(benchmark::State& state) { // cap the limit to 1 << 18. BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18); +// This is different from BM_RepeatedPut since we're just trying to benchmark +// one Put call, not thousands of them at once. +void BM_Put(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message")) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + // Create a document + DocumentProto document = DocumentBuilder() + .SetSchema("Message") + .SetNamespace("namespace") + .SetUri("uri") + .Build(); + + for (auto s : state) { + benchmark::DoNotOptimize(icing->Put(document)); + } +} +BENCHMARK(BM_Put); + +void BM_Get(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message")) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + // Create a document + DocumentProto document = DocumentBuilder() + .SetSchema("Message") + .SetNamespace("namespace") + .SetUri("uri") + .Build(); + + ASSERT_THAT(icing->Put(document).status(), ProtoIsOk()); + for (auto s : state) { + benchmark::DoNotOptimize( + icing->Get("namespace", "uri", GetResultSpecProto::default_instance())); + } +} +BENCHMARK(BM_Get); + +void BM_Delete(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message")) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + // Create a document + DocumentProto document = DocumentBuilder() + .SetSchema("Message") + .SetNamespace("namespace") + .SetUri("uri") + .Build(); + + ASSERT_THAT(icing->Put(document).status(), ProtoIsOk()); + for (auto s : state) { + state.PauseTiming(); + icing->Put(document); + state.ResumeTiming(); + + benchmark::DoNotOptimize(icing->Delete("namespace", "uri")); + } +} +BENCHMARK(BM_Delete); + } // namespace } // namespace lib diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index c1de0f0..752e0e2 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -42,6 +42,7 @@ #include "icing/schema-builder.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" +#include "icing/store/document-log-creator.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/jni-test-helpers.h" @@ -100,9 +101,26 @@ constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN = constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE = StringIndexingConfig_TokenizerType_Code_NONE; +constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY; constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX; constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN; +PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader( + Filesystem filesystem, const std::string& file_path) { + PortableFileBackedProtoLog<DocumentWrapper>::Header header; + filesystem.PRead(file_path.c_str(), &header, + sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header), + /*offset=*/0); + return header; +} + +void WriteDocumentLogHeader( + Filesystem filesystem, const std::string& file_path, + PortableFileBackedProtoLog<DocumentWrapper>::Header& header) { + filesystem.Write(file_path.c_str(), &header, + sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header)); +} + // For mocking purpose, we allow tests to provide a custom Filesystem. class TestIcingSearchEngine : public IcingSearchEngine { public: @@ -990,7 +1008,8 @@ TEST_F(IcingSearchEngineTest, SetSchema) { HasSubstr("'Photo' not found")); } -TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) { +TEST_F(IcingSearchEngineTest, + SetSchemaTriggersIndexRestorationAndReturnsOk) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); @@ -2074,7 +2093,8 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) { // Deletes document1 ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); const std::string document_log_path = - icing_options.base_dir() + "/document_dir/document_log"; + icing_options.base_dir() + "/document_dir/" + + DocumentLogCreator::GetDocumentLogFilename(); int64_t document_log_size_before = filesystem()->GetFileSize(document_log_path.c_str()); ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); @@ -3438,8 +3458,8 @@ TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) { EqualsProto(expected_get_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to - const std::string document_log_file = - absl_ports::StrCat(GetDocumentDir(), "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); const std::string corrupt_data = "1234"; EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(), corrupt_data.data(), corrupt_data.size())); @@ -5616,15 +5636,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) { // 2. Delete the last document from the document log { - const std::string document_log_file = - absl_ports::StrCat(GetDocumentDir(), "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); filesystem()->DeleteFile(document_log_file.c_str()); - ICING_ASSERT_OK_AND_ASSIGN(auto create_result, - FileBackedProtoLog<DocumentWrapper>::Create( - filesystem(), document_log_file.c_str(), - FileBackedProtoLog<DocumentWrapper>::Options( - /*compress_in=*/true))); - std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log = + ICING_ASSERT_OK_AND_ASSIGN( + auto create_result, + PortableFileBackedProtoLog<DocumentWrapper>::Create( + filesystem(), document_log_file.c_str(), + PortableFileBackedProtoLog<DocumentWrapper>::Options( + /*compress_in=*/true))); + std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log = std::move(create_result.proto_log); document = DocumentBuilder(document).SetUri("fake_type/0").Build(); @@ -5689,15 +5710,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) { // 2. Delete the last two documents from the document log. { - const std::string document_log_file = - absl_ports::StrCat(GetDocumentDir(), "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); filesystem()->DeleteFile(document_log_file.c_str()); - ICING_ASSERT_OK_AND_ASSIGN(auto create_result, - FileBackedProtoLog<DocumentWrapper>::Create( - filesystem(), document_log_file.c_str(), - FileBackedProtoLog<DocumentWrapper>::Options( - /*compress_in=*/true))); - std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log = + ICING_ASSERT_OK_AND_ASSIGN( + auto create_result, + PortableFileBackedProtoLog<DocumentWrapper>::Create( + filesystem(), document_log_file.c_str(), + PortableFileBackedProtoLog<DocumentWrapper>::Options( + /*compress_in=*/true))); + std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log = std::move(create_result.proto_log); document = DocumentBuilder(document).SetUri("fake_type/0").Build(); @@ -5994,8 +6016,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) { // Append a non-checksummed document. This will mess up the checksum of the // proto log, forcing it to rewind and later return a DATA_LOSS error. const std::string serialized_document = document.SerializeAsString(); - const std::string document_log_file = - absl_ports::StrCat(GetDocumentDir(), "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str()); filesystem()->PWrite(document_log_file.c_str(), file_size, @@ -6045,31 +6067,47 @@ TEST_F(IcingSearchEngineTest, .SetSchema("Message") .AddStringProperty("body", "message body") .Build(); + + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); + int64_t corruptible_offset; + { // Initialize and put a document. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // There's some space at the beginning of the file (e.g. header, kmagic, + // etc) that is necessary to initialize the FileBackedProtoLog. We can't + // corrupt that region, so we need to figure out the offset at which + // documents will be written to - which is the file size after + // initialization. + corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); } { - // Modify the document log checksum to trigger a complete document log - // rewind. - const std::string document_log_file = - absl_ports::StrCat(GetDocumentDir(), "/document_log"); - - FileBackedProtoLog<DocumentWrapper>::Header document_log_header; - filesystem()->PRead(document_log_file.c_str(), &document_log_header, - sizeof(FileBackedProtoLog<DocumentWrapper>::Header), - /*offset=*/0); - // Set a garbage checksum. - document_log_header.log_checksum = 10; - document_log_header.header_checksum = - document_log_header.CalculateHeaderChecksum(); - filesystem()->PWrite(document_log_file.c_str(), /*offset=*/0, - &document_log_header, - sizeof(FileBackedProtoLog<DocumentWrapper>::Header)); + // "Corrupt" the content written in the log. Make the corrupt document + // smaller than our original one so we don't accidentally write past our + // file. + DocumentProto document = + DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); + std::string serialized_document = document.SerializeAsString(); + ASSERT_TRUE(filesystem()->PWrite( + document_log_file.c_str(), corruptible_offset, + serialized_document.data(), serialized_document.size())); + + PortableFileBackedProtoLog<DocumentWrapper>::Header header = + ReadDocumentLogHeader(*filesystem(), document_log_file); + + // Set dirty bit to true to reflect that something changed in the log. + header.SetDirtyFlag(true); + header.SetHeaderChecksum(header.CalculateHeaderChecksum()); + + WriteDocumentLogHeader(*filesystem(), document_log_file, header); } { @@ -7182,6 +7220,177 @@ TEST_F(IcingSearchEngineTest, CJKSnippetTest) { EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2)); } +// We skip this test case when we're running in a jni_test since the data files +// will be stored in the android-instrumented storage location, rather than the +// normal cc_library runfiles directory. To get that storage location, it's +// recommended to use the TestStorage APIs which handles different API +// levels/absolute vs relative/etc differences. Since that's only accessible on +// the java-side, and I haven't figured out a way to pass that directory path to +// this native side yet, we're just going to disable this. The functionality is +// already well-tested across 4 different emulated OS's so we're not losing much +// test coverage here. +#ifndef ICING_JNI_TEST +// Disable backwards compat test. This test is enabled in google3, but disabled +// in jetpack/framework because we didn't want to keep the binary testdata files +// in our repo. +#define DISABLE_BACKWARDS_COMPAT_TEST +#ifndef DISABLE_BACKWARDS_COMPAT_TEST +TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) { + // Copy the testdata files into our IcingSearchEngine directory + std::string dir_without_portable_log; + if (IsAndroidX86()) { + dir_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_android_x86"); + } else if (IsAndroidArm()) { + dir_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_android_arm"); + } else if (IsIosPlatform()) { + dir_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_ios"); + } else { + dir_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_linux"); + } + + // Create dst directory that we'll initialize the IcingSearchEngine over. + std::string base_dir = GetTestBaseDir() + "_migrate"; + ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true); + ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true); + + ASSERT_TRUE(filesystem()->CopyDirectory(dir_without_portable_log.c_str(), + base_dir.c_str(), + /*recursive=*/true)); + + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(base_dir); + + IcingSearchEngine icing(icing_options, GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + + // Set up schema, this is the one used to validate documents in the testdata + // files. Do not change unless you're also updating the testdata files. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + // Make sure our schema is still the same as we expect. If not, there's + // definitely no way we're getting the documents back that we expect. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = schema; + ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto)); + + // These are the documents that are stored in the testdata files. Do not + // change unless you're also updating the testdata files. + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .AddStringProperty("body", "bar") + .Build(); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("email") + .SetCreationTimestampMs(20) + .SetScore(321) + .AddStringProperty("body", "baz bat") + .Build(); + + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace2", "uri1") + .SetSchema("email") + .SetCreationTimestampMs(30) + .SetScore(123) + .AddStringProperty("subject", "phoo") + .Build(); + + // Document 1 and 3 were put normally, and document 2 was deleted in our + // testdata files. + EXPECT_THAT(icing + .Get(document1.namespace_(), document1.uri(), + GetResultSpecProto::default_instance()) + .document(), + EqualsProto(document1)); + EXPECT_THAT(icing + .Get(document2.namespace_(), document2.uri(), + GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + EXPECT_THAT(icing + .Get(document3.namespace_(), document3.uri(), + GetResultSpecProto::default_instance()) + .document(), + EqualsProto(document3)); + + // Searching for "foo" should get us document1. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("foo"); + + SearchResultProto expected_document1; + expected_document1.mutable_status()->set_code(StatusProto::OK); + *expected_document1.mutable_results()->Add()->mutable_document() = document1; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(expected_document1)); + + // Searching for "baz" would've gotten us document2, except it got deleted. + // Make sure that it's cleared from our index too. + search_spec.set_query("baz"); + + SearchResultProto expected_no_documents; + expected_no_documents.mutable_status()->set_code(StatusProto::OK); + + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(expected_no_documents)); + + // Searching for "phoo" should get us document3. + search_spec.set_query("phoo"); + + SearchResultProto expected_document3; + expected_document3.mutable_status()->set_code(StatusProto::OK); + *expected_document3.mutable_results()->Add()->mutable_document() = document3; + + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(expected_document3)); +} +#endif // DISABLE_BACKWARDS_COMPAT_TEST +#endif // !ICING_JNI_TEST + } // namespace } // namespace lib } // namespace icing diff --git a/icing/portable/platform.h b/icing/portable/platform.h index 8712835..150eede 100644 --- a/icing/portable/platform.h +++ b/icing/portable/platform.h @@ -34,11 +34,19 @@ inline bool IsReverseJniTokenization() { return false; } -// Whether the running test is an Android test. -inline bool IsAndroidPlatform() { -#if defined(__ANDROID__) +// Whether we're running on android_x86 +inline bool IsAndroidX86() { +#if defined(__ANDROID__) && defined(__i386__) return true; -#endif // defined(__ANDROID__) +#endif // defined(__ANDROID__) && defined(__i386__) + return false; +} + +// Whether we're running on android_armeabi-v7a +inline bool IsAndroidArm() { +#if defined(__ANDROID__) && defined(__arm__) + return true; +#endif // defined(__ANDROID__) && defined(__arm__) return false; } diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc new file mode 100644 index 0000000..a035f93 --- /dev/null +++ b/icing/store/document-log-creator.cc @@ -0,0 +1,206 @@ +// Copyright (C) 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/store/document-log-creator.h" + +#include <memory> +#include <string> +#include <utility> + +#include "icing/text_classifier/lib3/utils/base/logging.h" +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/annotate.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/file-backed-proto-log.h" +#include "icing/file/filesystem.h" +#include "icing/file/portable-file-backed-proto-log.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/util/logging.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +namespace { + +// Used in DocumentId mapper to mark a document as deleted +constexpr char kDocumentLogFilename[] = "document_log"; + +std::string DocumentLogFilenameV0() { + // Originally only had this one version, no suffix. + return kDocumentLogFilename; +} + +std::string DocumentLogFilenameV1() { + return absl_ports::StrCat(kDocumentLogFilename, "_v1"); +} + +std::string MakeDocumentLogFilenameV0(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0()); +} + +std::string MakeDocumentLogFilenameV1(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1()); +} + +} // namespace + +std::string DocumentLogCreator::GetDocumentLogFilename() { + // This should always return the latest version of the document log in use. + // The current latest version is V1. + return DocumentLogFilenameV1(); +} + +libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> +DocumentLogCreator::Create(const Filesystem* filesystem, + const std::string& base_dir) { + bool v0_exists = + filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str()); + bool regen_derived_files = false; + +#ifdef ENABLE_V1_MIGRATION + bool v1_exists = + filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str()); + + if (v0_exists && !v1_exists) { + ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir)); + + // Need to regenerate derived files since documents may be written to a + // different file offset in the log. + regen_derived_files = true; + } else if (!v1_exists) { + // First time initializing a v1 log. There are no existing derived files at + // this point, so we should generate some. "regenerate" here also means + // "generate for the first time", i.e. we shouldn't expect there to be any + // existing derived files. + regen_derived_files = true; + } +#else // !ENABLE_V1_MIGRATION + if (v0_exists) { + // If migration from v0 to v1 is not enabled, then simply delete the v0 file + // and treat this as if it's our first time initializing a v1 log. + regen_derived_files = true; + filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str()); + } +#endif // ENABLED_V1_MIGRATION + + ICING_ASSIGN_OR_RETURN( + PortableFileBackedProtoLog<DocumentWrapper>::CreateResult + log_create_result, + PortableFileBackedProtoLog<DocumentWrapper>::Create( + filesystem, MakeDocumentLogFilenameV1(base_dir), + PortableFileBackedProtoLog<DocumentWrapper>::Options( + /*compress_in=*/true))); + + CreateResult create_result = {std::move(log_create_result), + regen_derived_files}; + return create_result; +} + +libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1( + const Filesystem* filesystem, const std::string& base_dir) { + ICING_VLOG(1) << "Migrating from v0 to v1 document log."; + + // Our v0 proto log was non-portable, create it so we can read protos out from + // it. + auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create( + filesystem, MakeDocumentLogFilenameV0(base_dir), + FileBackedProtoLog<DocumentWrapper>::Options( + /*compress_in=*/true)); + if (!v0_create_result_or.ok()) { + return absl_ports::Annotate( + v0_create_result_or.status(), + "Failed to initialize v0 document log while migrating."); + return v0_create_result_or.status(); + } + FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result = + std::move(v0_create_result_or).ValueOrDie(); + std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log = + std::move(v0_create_result.proto_log); + + // Create a v1 portable proto log that we will write our protos to. + auto v1_create_result_or = + PortableFileBackedProtoLog<DocumentWrapper>::Create( + filesystem, MakeDocumentLogFilenameV1(base_dir), + PortableFileBackedProtoLog<DocumentWrapper>::Options( + /*compress_in=*/true)); + if (!v1_create_result_or.ok()) { + return absl_ports::Annotate( + v1_create_result_or.status(), + "Failed to initialize v1 document log while migrating."); + } + PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result = + std::move(v1_create_result_or).ValueOrDie(); + std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log = + std::move(v1_create_result.proto_log); + + // Dummy empty document to be used when copying over deleted documents. + DocumentProto empty_document; + + // Start reading out from the old log and putting them in the new log. + auto iterator = v0_proto_log->GetIterator(); + auto iterator_status = iterator.Advance(); + while (iterator_status.ok()) { + libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or = + v0_proto_log->ReadProto(iterator.GetOffset()); + + bool deleted_document = false; + DocumentWrapper document_wrapper; + if (absl_ports::IsNotFound(document_wrapper_or.status())) { + // Proto was erased, we can skip copying this into our new log. + *document_wrapper.mutable_document() = empty_document; + deleted_document = true; + } else if (!document_wrapper_or.ok()) { + // Some real error, pass up + return document_wrapper_or.status(); + } else { + document_wrapper = std::move(document_wrapper_or).ValueOrDie(); + } + + auto offset_or = v1_proto_log->WriteProto(document_wrapper); + if (!offset_or.ok()) { + return absl_ports::Annotate( + offset_or.status(), + "Failed to write proto to v1 document log while migrating."); + } + + // If the original document was deleted, erase the proto we just wrote. + // We do this to maintain the document_ids, i.e. we still want document_id 2 + // to point to a deleted document even though we may not have the document + // contents anymore. DocumentStore guarantees that the document_ids don't + // change unless an Optimize is triggered. + if (deleted_document) { + int64_t offset = offset_or.ValueOrDie(); + auto erased_status = v1_proto_log->EraseProto(offset); + if (!erased_status.ok()) { + return absl_ports::Annotate( + erased_status, + "Failed to erase proto in v1 document log while migrating."); + } + } + + iterator_status = iterator.Advance(); + } + + // Close out our file log pointers. + v0_proto_log.reset(); + v1_proto_log.reset(); + + return libtextclassifier3::Status::OK; +} + +} // namespace lib +} // namespace icing diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h new file mode 100644 index 0000000..51cf497 --- /dev/null +++ b/icing/store/document-log-creator.h @@ -0,0 +1,77 @@ +// Copyright (C) 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_STORE_DOCUMENT_LOG_CREATOR_H_ +#define ICING_STORE_DOCUMENT_LOG_CREATOR_H_ + +#include <string> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/file/filesystem.h" +#include "icing/file/portable-file-backed-proto-log.h" +#include "icing/proto/document_wrapper.pb.h" + +namespace icing { +namespace lib { + +// Handles creation of the document log and any underlying migrations that may +// be necessary. +class DocumentLogCreator { + public: + struct CreateResult { + // The create result passed up from the PortableFileBackedProtoLog::Create. + // Contains the document log. + PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result; + + // Whether the caller needs to also regenerate/generate any derived files + // based off of the initialized document log. + bool regen_derived_files; + }; + + // Creates the document log in the base_dir. Will create one if it doesn't + // already exist. + // + // This also handles any potential migrations from old document log versions. + // At the end of this call, the most up-to-date log will be returned and will + // be usable. + // + // Returns: + // CreateResult on success. + // INTERNAL on any I/O error. + static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create( + const Filesystem* filesystem, const std::string& base_dir); + + // Returns the filename of the document log, without any directory prefixes. + // Used mainly for testing purposes. + static std::string GetDocumentLogFilename(); + + private: + // Handles migrating a v0 document log (not portable) to a v1 document log + // (portable). This will initialize the log in the beginning, and close it + // when migration is done. Callers will need to reinitialize the log on their + // own. + // + // Returns: + // OK on success. + // INVALID_ARGUMENT if some invalid option was passed to the document log. + // INTERNAL on I/O error. + static libtextclassifier3::Status MigrateFromV0ToV1( + const Filesystem* filesystem, const std::string& base_dir); +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_STORE_DOCUMENT_LOG_CREATOR_H_ diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 4e63b90..907bace 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -33,6 +33,7 @@ #include "icing/file/file-backed-vector.h" #include "icing/file/filesystem.h" #include "icing/file/memory-mapped-file.h" +#include "icing/file/portable-file-backed-proto-log.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/proto/document.pb.h" #include "icing/proto/document_wrapper.pb.h" @@ -44,6 +45,7 @@ #include "icing/store/document-associated-score-data.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" +#include "icing/store/document-log-creator.h" #include "icing/store/key-mapper.h" #include "icing/store/namespace-id.h" #include "icing/store/usage-store.h" @@ -62,7 +64,6 @@ namespace { // Used in DocumentId mapper to mark a document as deleted constexpr int64_t kDocDeletedFlag = -1; -constexpr char kDocumentLogFilename[] = "document_log"; constexpr char kDocumentIdMapperFilename[] = "document_id_mapper"; constexpr char kDocumentStoreHeaderFilename[] = "document_store_header"; constexpr char kScoreCacheFilename[] = "score_cache"; @@ -93,10 +94,6 @@ std::string MakeDocumentIdMapperFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename); } -std::string MakeDocumentLogFilename(const std::string& base_dir) { - return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename); -} - std::string MakeScoreCacheFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename); } @@ -224,30 +221,36 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats) { - auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create( - filesystem_, MakeDocumentLogFilename(base_dir_), - FileBackedProtoLog<DocumentWrapper>::Options( - /*compress_in=*/true)); + auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_); + // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN // that can support error logging. if (!create_result_or.ok()) { ICING_LOG(ERROR) << create_result_or.status().error_message() - << "\nFailed to initialize DocumentLog"; + << "\nFailed to initialize DocumentLog."; return create_result_or.status(); } - FileBackedProtoLog<DocumentWrapper>::CreateResult create_result = + DocumentLogCreator::CreateResult create_result = std::move(create_result_or).ValueOrDie(); - document_log_ = std::move(create_result.proto_log); - if (force_recovery_and_revalidate_documents || - create_result.has_data_loss()) { - if (create_result.has_data_loss() && initialize_stats != nullptr) { + document_log_ = std::move(create_result.log_create_result.proto_log); + + if (create_result.regen_derived_files || + force_recovery_and_revalidate_documents || + create_result.log_create_result.has_data_loss()) { + // We can't rely on any existing derived files. Recreate them from scratch. + // Currently happens if: + // 1) This is a new log and we don't have derived files yet + // 2) Client wanted us to force a regeneration. + // 3) Log has some data loss, can't rely on existing derived data. + if (create_result.log_create_result.has_data_loss() && + initialize_stats != nullptr) { ICING_LOG(WARNING) << "Data loss in document log, regenerating derived files."; initialize_stats->set_document_store_recovery_cause( InitializeStatsProto::DATA_LOSS); - if (create_result.data_loss == DataLoss::PARTIAL) { + if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) { // Ground truth is partially lost. initialize_stats->set_document_store_data_status( InitializeStatsProto::PARTIAL_LOSS); @@ -257,10 +260,16 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( InitializeStatsProto::COMPLETE_LOSS); } } + std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer(); libtextclassifier3::Status status = RegenerateDerivedFiles(force_recovery_and_revalidate_documents); - if (initialize_stats != nullptr) { + if (initialize_stats != nullptr && + (force_recovery_and_revalidate_documents || + create_result.log_create_result.has_data_loss())) { + // Only consider it a recovery if the client forced a recovery or there + // was data loss. Otherwise, this could just be the first time we're + // initializing and generating derived files. initialize_stats->set_document_store_recovery_latency_ms( document_recovery_timer->GetElapsedMilliseconds()); } @@ -270,7 +279,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( return status; } } else { - if (!InitializeDerivedFiles().ok()) { + if (!InitializeExistingDerivedFiles().ok()) { ICING_VLOG(1) << "Couldn't find derived files or failed to initialize them, " "regenerating derived files for DocumentStore."; @@ -296,10 +305,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( initialize_stats->set_num_documents(document_id_mapper_->num_elements()); } - return create_result.data_loss; + return create_result.log_create_result.data_loss; } -libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() { +libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() { if (!HeaderExists()) { // Without a header, we don't know if things are consistent between each // other so the caller should just regenerate everything from ground diff --git a/icing/store/document-store.h b/icing/store/document-store.h index b0cd1ce..79d99d4 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -26,6 +26,7 @@ #include "icing/file/file-backed-proto-log.h" #include "icing/file/file-backed-vector.h" #include "icing/file/filesystem.h" +#include "icing/file/portable-file-backed-proto-log.h" #include "icing/proto/document.pb.h" #include "icing/proto/document_wrapper.pb.h" #include "icing/proto/logging.pb.h" @@ -438,7 +439,7 @@ class DocumentStore { // A log used to store all documents, it serves as a ground truth of doc // store. key_mapper_ and document_id_mapper_ can be regenerated from it. - std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_; + std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_; // Key (namespace + uri) to DocumentId mapping std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_; @@ -495,11 +496,35 @@ class DocumentStore { bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats); + // Initializes a new DocumentStore and sets up any underlying files. + // + // Returns: + // Data loss status on success, effectively always DataLoss::NONE + // INTERNAL on I/O error + libtextclassifier3::StatusOr<DataLoss> InitializeNewStore( + InitializeStatsProto* initialize_stats); + + // Initializes a DocumentStore over an existing directory of files. + // + // stats will be set if non-null + // + // Returns: + // Data loss status on success + // INTERNAL on I/O error + libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore( + bool force_recovery_and_revalidate_documents, + InitializeStatsProto* initialize_stats); + + libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1( + InitializeStatsProto* initialize_stats); + // Creates sub-components and verifies the integrity of each sub-component. + // This assumes that the the underlying files already exist, and will return + // an error if it doesn't find what it's expecting. // // Returns an error if subcomponents failed to initialize successfully. // INTERNAL_ERROR on IO error - libtextclassifier3::Status InitializeDerivedFiles(); + libtextclassifier3::Status InitializeExistingDerivedFiles(); // Re-generates all files derived from the ground truth: the document log. // diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index f68e115..ce608fc 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -168,6 +168,93 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) { } BENCHMARK(BM_DoesDocumentExistBenchmark); +void BM_Put(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + DestructibleDirectory ddir(filesystem, directory); + + std::string document_store_dir = directory + "/store"; + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + DocumentProto document = CreateDocument("namespace", "uri"); + + for (auto s : state) { + // It's ok that this is the same document over and over. We'll create a new + // document_id for it and still insert the proto into the underlying log. + benchmark::DoNotOptimize(document_store->Put(document)); + } +} +BENCHMARK(BM_Put); + +void BM_GetSameDocument(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + DestructibleDirectory ddir(filesystem, directory); + + std::string document_store_dir = directory + "/store"; + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK(document_store->Put(CreateDocument("namespace", "uri"))); + + for (auto s : state) { + benchmark::DoNotOptimize(document_store->Get("namespace", "uri")); + } +} +BENCHMARK(BM_GetSameDocument); + +void BM_Delete(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + DestructibleDirectory ddir(filesystem, directory); + + std::string document_store_dir = directory + "/store"; + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + DocumentProto document = CreateDocument("namespace", "uri"); + + for (auto s : state) { + state.PauseTiming(); + ICING_ASSERT_OK(document_store->Put(document)); + state.ResumeTiming(); + + benchmark::DoNotOptimize(document_store->Delete("namespace", "uri")); + } +} +BENCHMARK(BM_Delete); + } // namespace } // namespace lib diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index ad3b7c4..3ed4c4e 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -15,6 +15,7 @@ #include "icing/store/document-store.h" #include <cstdint> +#include <filesystem> #include <limits> #include <memory> #include <string> @@ -40,6 +41,7 @@ #include "icing/store/corpus-id.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" +#include "icing/store/document-log-creator.h" #include "icing/store/namespace-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" @@ -105,6 +107,22 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri, return usage_report; } +PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader( + Filesystem filesystem, const std::string& file_path) { + PortableFileBackedProtoLog<DocumentWrapper>::Header header; + filesystem.PRead(file_path.c_str(), &header, + sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header), + /*offset=*/0); + return header; +} + +void WriteDocumentLogHeader( + Filesystem filesystem, const std::string& file_path, + PortableFileBackedProtoLog<DocumentWrapper>::Header& header) { + filesystem.Write(file_path.c_str(), &header, + sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header)); +} + class DocumentStoreTest : public ::testing::Test { protected: DocumentStoreTest() @@ -452,14 +470,18 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth int64_t document_log_size_before = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_THAT( document_store->Delete("nonexistent_namespace", "nonexistent_uri"), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t document_log_size_after = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } @@ -538,13 +560,17 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth int64_t document_log_size_before = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t document_log_size_after = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } @@ -607,7 +633,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); document_log_size_before = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); } // Destructors should update checksum and persist all data to file. CorruptDocStoreHeaderChecksumFile(); @@ -621,7 +649,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { // Make sure we didn't add anything to the ground truth after we recovered. int64_t document_log_size_after = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_EQ(document_log_size_before, document_log_size_after); EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()), @@ -730,13 +760,17 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth int64_t document_log_size_before = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t document_log_size_after = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } @@ -809,7 +843,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); document_log_size_before = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); } // Destructors should update checksum and persist all data to file. CorruptDocStoreHeaderChecksumFile(); @@ -823,7 +859,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { // Make sure we didn't add anything to the ground truth after we recovered. int64_t document_log_size_after = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_EQ(document_log_size_before, document_log_size_after); EXPECT_THAT(document_store->Get(email_document_id), @@ -901,7 +939,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { IsOkAndHolds(EqualsProto(message_document))); document_log_size_before = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); } // Destructors should update checksum and persist all data to file. CorruptDocStoreHeaderChecksumFile(); @@ -923,7 +963,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { // Make sure we didn't add anything to the ground truth after we recovered. int64_t document_log_size_after = filesystem_.GetFileSize( - absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); EXPECT_EQ(document_log_size_before, document_log_size_after); EXPECT_THAT(document_store->Get(email_document_id), @@ -968,7 +1010,9 @@ TEST_F(DocumentStoreTest, OptimizeInto) { ICING_ASSERT_OK(doc_store->Put(document2)); ICING_ASSERT_OK(doc_store->Put(document3)); - std::string original_document_log = document_store_dir_ + "/document_log"; + std::string original_document_log = absl_ports::StrCat( + document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename()); + int64_t original_size = filesystem_.GetFileSize(original_document_log.c_str()); @@ -979,7 +1023,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) { HasSubstr("directory is the same"))); std::string optimized_dir = document_store_dir_ + "_optimize"; - std::string optimized_document_log = optimized_dir + "/document_log"; + std::string optimized_document_log = + optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename(); // Validates that the optimized document log has the same size if nothing is // deleted @@ -1067,8 +1112,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) { DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); const std::string serialized_document = document.SerializeAsString(); - const std::string document_log_file = - absl_ports::StrCat(document_store_dir_, "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename()); int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str()); filesystem_.PWrite(document_log_file.c_str(), file_size, serialized_document.data(), serialized_document.size()); @@ -2919,8 +2964,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) { DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); const std::string serialized_document = document.SerializeAsString(); - const std::string document_log_file = - absl_ports::StrCat(document_store_dir_, "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename()); int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str()); filesystem_.PWrite(document_log_file.c_str(), file_size, serialized_document.data(), serialized_document.size()); @@ -3043,7 +3088,9 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) { const std::string serialized_document = document.SerializeAsString(); const std::string document_log_file = - absl_ports::StrCat(document_store_dir_, "/document_log"); + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str(); int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str()); filesystem_.PWrite(document_log_file.c_str(), file_size, serialized_document.data(), serialized_document.size()); @@ -3060,8 +3107,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) { TEST_F(DocumentStoreTest, DetectCompleteDataLoss) { int64_t corruptible_offset; - const std::string document_log_file = - absl_ports::StrCat(document_store_dir_, "/document_log"); + const std::string document_log_file = absl_ports::StrCat( + document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename()); { // Can put and delete fine. ICING_ASSERT_OK_AND_ASSIGN( @@ -3088,8 +3135,30 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) { // "Corrupt" the persisted content written in the log. We can't recover if // the persisted data was corrupted. std::string corruption = "abc"; - filesystem_.PWrite(document_log_file.c_str(), /*offset=*/corruptible_offset, - corruption.data(), corruption.size()); + filesystem_.PWrite(document_log_file.c_str(), + /*offset=*/corruptible_offset, corruption.data(), + corruption.size()); + + { + // "Corrupt" the content written in the log. Make the corrupt document + // smaller than our original one so we don't accidentally write past our + // file. + DocumentProto document = + DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); + std::string serialized_document = document.SerializeAsString(); + ASSERT_TRUE(filesystem_.PWrite( + document_log_file.c_str(), corruptible_offset, + serialized_document.data(), serialized_document.size())); + + PortableFileBackedProtoLog<DocumentWrapper>::Header header = + ReadDocumentLogHeader(filesystem_, document_log_file); + + // Set dirty bit to true to reflect that something changed in the log. + header.SetDirtyFlag(true); + header.SetHeaderChecksum(header.CalculateHeaderChecksum()); + + WriteDocumentLogHeader(filesystem_, document_log_file, header); + } // Successfully recover from a data loss issue. ICING_ASSERT_OK_AND_ASSIGN( @@ -3106,8 +3175,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) { // the document store header. // // This causes a problem now because this cl changes behavior to not consider an -// InitializeDerivedFiles failure to be a recovery if there is nothing to -// recover because the doocument store is empty. +// InitializeExistingDerivedFiles failure to be a recovery if there is nothing +// to recover because the doocument store is empty. #define DISABLE_BACKWARDS_COMPAT_TEST #ifndef DISABLE_BACKWARDS_COMPAT_TEST TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { @@ -3667,6 +3736,128 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { } } +#ifndef DISABLE_BACKWARDS_COMPAT_TEST +TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) { + // Set up schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + std::string schema_store_dir = schema_store_dir_ + "_migrate"; + filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str()); + filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); + + ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + + // Create dst directory that we'll initialize the DocumentStore over. + std::string document_store_dir = document_store_dir_ + "_migrate"; + ASSERT_THAT( + filesystem_.DeleteDirectoryRecursively(document_store_dir.c_str()), true); + ASSERT_THAT( + filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()), true); + + // Copy the testdata files into our DocumentStore directory + std::string document_store_without_portable_log; + if (IsAndroidX86()) { + document_store_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_android_x86/document_dir"); + } else if (IsAndroidArm()) { + document_store_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_android_arm/document_dir"); + } else if (IsIosPlatform()) { + document_store_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_ios/document_dir"); + } else { + document_store_without_portable_log = GetTestFilePath( + "icing/testdata/not_portable_log/" + "icing_search_engine_linux/document_dir"); + } + + ASSERT_TRUE(filesystem_.CopyDirectory( + document_store_without_portable_log.c_str(), document_store_dir.c_str(), + /*recursive=*/true)); + + // Initialize the DocumentStore over our copied files. + InitializeStatsProto initialize_stats; + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_, + schema_store.get(), + /*force_recovery_and_revalidate_documents=*/false, + &initialize_stats)); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + // These are the documents that are stored in the testdata files. Do not + // change unless you're also updating the testdata files. + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .AddStringProperty("body", "bar") + .Build(); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("email") + .SetCreationTimestampMs(20) + .SetScore(321) + .AddStringProperty("body", "baz bat") + .Build(); + + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace2", "uri1") + .SetSchema("email") + .SetCreationTimestampMs(30) + .SetScore(123) + .AddStringProperty("subject", "phoo") + .Build(); + + // Check that we didn't lose anything. A migration also doesn't technically + // count as a recovery. + EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); + EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause()); + + // Document 1 and 3 were put normally, and document 2 was deleted in our + // testdata files. + // + // Check by namespace, uri + EXPECT_THAT(document_store->Get(document1.namespace_(), document1.uri()), + IsOkAndHolds(EqualsProto(document1))); + EXPECT_THAT(document_store->Get(document2.namespace_(), document2.uri()), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(document_store->Get(document3.namespace_(), document3.uri()), + IsOkAndHolds(EqualsProto(document3))); + + // Check by document_id + EXPECT_THAT(document_store->Get(/*document_id=*/0), + IsOkAndHolds(EqualsProto(document1))); + EXPECT_THAT(document_store->Get(/*document_id=*/1), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(document_store->Get(/*document_id=*/2), + IsOkAndHolds(EqualsProto(document3))); +} +#endif // DISABLE_BACKWARDS_COMPAT_TEST + } // namespace } // namespace lib |