aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2021-06-30 20:23:15 +0000
committerTim Barron <tjbarron@google.com>2021-07-02 04:02:00 +0000
commit0dc99ac82a5cb45a6795d535d4df492f7f44b686 (patch)
tree49edf1ba008b63bfa2ed8276d421d5ca6f62ec1b
parent672648070b7d9c7e066d941f52f8a4b682cf963f (diff)
downloadicing-0dc99ac82a5cb45a6795d535d4df492f7f44b686.tar.gz
Merge remote-tracking branch 'goog/androidx-platform-dev'
Per yamasani@'s request, I have broken up ag/15113841 [I1d9d017f6d225c292295b2fbccd42ea731ef40c0] into smaller chunks to simplify review. This change should only include the changes for: - Switch DocumentStore to using PortableFileBackedProtoLog instead of FileBackedProtoLog. I've made one additional change from the state of the relevant code compared to ag/15113841. Per discussion with yamasani@ and adorokhine@, we decided to skip the migration and just accept data loss given the small population of devices that will have existing AppSearch data in system_server. The old migration code still exists, but is behind the ENABLE_V1_MIGRATION preprocessor macro which will not be defined. Bug: 185806837 Test: Presubmit Change-Id: I589fa722bf2695127a127db550113d937d5590f4
-rw-r--r--icing/file/file-backed-proto-log.h49
-rw-r--r--icing/file/file-backed-proto-log_benchmark.cc40
-rw-r--r--icing/file/portable-file-backed-proto-log.h40
-rw-r--r--icing/file/portable-file-backed-proto-log_benchmark.cc40
-rw-r--r--icing/file/portable-file-backed-proto-log_test.cc20
-rw-r--r--icing/icing-search-engine_benchmark.cc114
-rw-r--r--icing/icing-search-engine_test.cc285
-rw-r--r--icing/portable/platform.h16
-rw-r--r--icing/store/document-log-creator.cc206
-rw-r--r--icing/store/document-log-creator.h77
-rw-r--r--icing/store/document-store.cc49
-rw-r--r--icing/store/document-store.h29
-rw-r--r--icing/store/document-store_benchmark.cc87
-rw-r--r--icing/store/document-store_test.cc241
14 files changed, 1148 insertions, 145 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 9ccd81b..b2b37e8 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -80,23 +80,6 @@
namespace icing {
namespace lib {
-namespace {
-
-bool IsEmptyBuffer(const char* buffer, int size) {
- return std::all_of(buffer, buffer + size,
- [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-} // namespace
-
template <typename ProtoT>
class FileBackedProtoLog {
public:
@@ -402,6 +385,28 @@ class FileBackedProtoLog {
const Filesystem* filesystem, const std::string& file_path,
Crc32 initial_crc, int64_t start, int64_t end);
+ static bool IsEmptyBuffer(const char* buffer, int size) {
+ return std::all_of(buffer, buffer + size,
+ [](const char byte) { return byte == 0; });
+ }
+
+ // Helper function to get stored proto size from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+ // Helper function to get stored proto magic from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+ // Reads out the metadata of a proto located at file_offset from the file.
+ //
+ // Returns:
+ // Proto's metadata on success
+ // OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+ // INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+ static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
+ MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+
// Magic number added in front of every proto. Used when reading out protos
// as a first check for corruption in each entry in the file. Even if there is
// a corruption, the best we can do is roll back to our last recovery point
@@ -429,15 +434,6 @@ class FileBackedProtoLog {
ScopedFd fd_;
const Filesystem* const filesystem_;
const std::string file_path_;
-
- // Reads out the metadata of a proto located at file_offset from the file.
- //
- // Returns:
- // Proto's metadata on success
- // OUT_OF_RANGE_ERROR if file_offset exceeds file_size
- // INTERNAL_ERROR if the metadata is invalid or any IO errors happen
- static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
- MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
std::unique_ptr<Header> header_;
};
@@ -573,6 +569,7 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
ComputeChecksum(filesystem, file_path, Crc32(),
sizeof(Header), file_size));
+
// Double check that the log checksum is the same as the one that was
// persisted last time. If not, we start recovery logic.
if (header->log_checksum != calculated_log_checksum.Get()) {
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
index 766cc64..c09fd5a 100644
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -164,6 +164,46 @@ BENCHMARK(BM_Read)
// 16MiB, and we need some extra space for the
// rest of the document properties
+static void BM_Erase(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = IcingStringUtil::StringPrintf(
+ "%s%s", GetTestTempDir().c_str(), "/proto.log");
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log =
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ std::default_random_engine random;
+ const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+ proto_log->WriteProto(document));
+ state.ResumeTiming();
+
+ testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
static void BM_ComputeChecksum(benchmark::State& state) {
const Filesystem filesystem;
const std::string file_path = GetTestTempDir() + "/proto.log";
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 000ab3d..825b763 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -83,28 +83,6 @@
namespace icing {
namespace lib {
-namespace {
-
-// Number of bytes we reserve for the heading at the beginning of the proto log.
-// We reserve this so the header can grow without running into the contents of
-// the proto log, triggering an unnecessary migration of the data.
-constexpr int kHeaderReservedBytes = 256;
-
-bool IsEmptyBuffer(const char* buffer, int size) {
- return std::all_of(buffer, buffer + size,
- [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-} // namespace
-
template <typename ProtoT>
class PortableFileBackedProtoLog {
public:
@@ -135,6 +113,11 @@ class PortableFileBackedProtoLog {
: compress(compress_in), max_proto_size(max_proto_size_in) {}
};
+ // Number of bytes we reserve for the heading at the beginning of the proto
+ // log. We reserve this so the header can grow without running into the
+ // contents of the proto log, triggering an unnecessary migration of the data.
+ static constexpr int kHeaderReservedBytes = 256;
+
// Header stored at the beginning of the file before the rest of the log
// contents. Stores metadata on the log.
class Header {
@@ -541,6 +524,19 @@ class PortableFileBackedProtoLog {
static libtextclassifier3::Status WriteProtoMetadata(
const Filesystem* filesystem, int fd, int32_t host_order_metadata);
+ static bool IsEmptyBuffer(const char* buffer, int size) {
+ return std::all_of(buffer, buffer + size,
+ [](const char byte) { return byte == 0; });
+ }
+
+ // Helper function to get stored proto size from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+ // Helper function to get stored proto magic from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
// Magic number added in front of every proto. Used when reading out protos
// as a first check for corruption in each entry in the file. Even if there is
// a corruption, the best we can do is roll back to our last recovery point
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index b1dfe12..04ccab0 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -163,6 +163,46 @@ BENCHMARK(BM_Read)
->Arg(15 * 1024 * 1024); // We do 15MiB here since our max proto size is
// 16MiB, and we need some extra space for the
// rest of the document properties
+ //
+static void BM_Erase(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = IcingStringUtil::StringPrintf(
+ "%s%s", GetTestTempDir().c_str(), "/proto.log");
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ std::default_random_engine random;
+ const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+ proto_log->WriteProto(document));
+ state.ResumeTiming();
+
+ testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
static void BM_ComputeChecksum(benchmark::State& state) {
const Filesystem filesystem;
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index 69b8a1a..b5fee4b 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -113,7 +113,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
// With no protos written yet, the log should be minimum the size of the
// reserved header space.
- ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()), kHeaderReservedBytes);
+ ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+ PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
}
TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
@@ -417,8 +418,9 @@ TEST_F(PortableFileBackedProtoLogTest,
// We still have the corrupted content in our file, we didn't throw
// everything out.
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Gt(kHeaderReservedBytes));
+ EXPECT_THAT(
+ filesystem_.GetFileSize(file_path_.c_str()),
+ Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
}
}
@@ -456,9 +458,10 @@ TEST_F(PortableFileBackedProtoLogTest,
DocumentProto document =
DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
std::string serialized_document = document.SerializeAsString();
- ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), kHeaderReservedBytes,
- serialized_document.data(),
- serialized_document.size()));
+ ASSERT_TRUE(filesystem_.PWrite(
+ file_path_.c_str(),
+ PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
+ serialized_document.data(), serialized_document.size()));
Header header = ReadHeader(filesystem_, file_path_);
@@ -484,8 +487,9 @@ TEST_F(PortableFileBackedProtoLogTest,
EXPECT_TRUE(create_result.recalculated_checksum);
// We lost everything, file size is back down to the header.
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kHeaderReservedBytes));
+ EXPECT_THAT(
+ filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
// At least the log is no longer dirty.
Header header = ReadHeader(filesystem_, file_path_);
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index b437724..316b74f 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -577,6 +577,120 @@ void BM_RepeatedPut(benchmark::State& state) {
// cap the limit to 1 << 18.
BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
+// This is different from BM_RepeatedPut since we're just trying to benchmark
+// one Put call, not thousands of them at once.
+void BM_Put(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document
+ DocumentProto document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .SetUri("uri")
+ .Build();
+
+ for (auto s : state) {
+ benchmark::DoNotOptimize(icing->Put(document));
+ }
+}
+BENCHMARK(BM_Put);
+
+void BM_Get(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document
+ DocumentProto document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .SetUri("uri")
+ .Build();
+
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ for (auto s : state) {
+ benchmark::DoNotOptimize(
+ icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
+ }
+}
+BENCHMARK(BM_Get);
+
+void BM_Delete(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document
+ DocumentProto document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .SetUri("uri")
+ .Build();
+
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ for (auto s : state) {
+ state.PauseTiming();
+ icing->Put(document);
+ state.ResumeTiming();
+
+ benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
+ }
+}
+BENCHMARK(BM_Delete);
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index c1de0f0..752e0e2 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -42,6 +42,7 @@
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
+#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
@@ -100,9 +101,26 @@ constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
StringIndexingConfig_TokenizerType_Code_NONE;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path) {
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+ filesystem.PRead(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+ /*offset=*/0);
+ return header;
+}
+
+void WriteDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path,
+ PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+ filesystem.Write(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
@@ -990,7 +1008,8 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
HasSubstr("'Photo' not found"));
}
-TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
+TEST_F(IcingSearchEngineTest,
+ SetSchemaTriggersIndexRestorationAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -2074,7 +2093,8 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
// Deletes document1
ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
const std::string document_log_path =
- icing_options.base_dir() + "/document_dir/document_log";
+ icing_options.base_dir() + "/document_dir/" +
+ DocumentLogCreator::GetDocumentLogFilename();
int64_t document_log_size_before =
filesystem()->GetFileSize(document_log_path.c_str());
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
@@ -3438,8 +3458,8 @@ TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
EqualsProto(expected_get_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
const std::string corrupt_data = "1234";
EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
corrupt_data.data(), corrupt_data.size()));
@@ -5616,15 +5636,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) {
// 2. Delete the last document from the document log
{
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
- FileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
std::move(create_result.proto_log);
document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5689,15 +5710,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
// 2. Delete the last two documents from the document log.
{
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
- FileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
std::move(create_result.proto_log);
document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5994,8 +6016,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
// Append a non-checksummed document. This will mess up the checksum of the
// proto log, forcing it to rewind and later return a DATA_LOSS error.
const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
filesystem()->PWrite(document_log_file.c_str(), file_size,
@@ -6045,31 +6067,47 @@ TEST_F(IcingSearchEngineTest,
.SetSchema("Message")
.AddStringProperty("body", "message body")
.Build();
+
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ int64_t corruptible_offset;
+
{
// Initialize and put a document.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // There's some space at the beginning of the file (e.g. header, kmagic,
+ // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+ // corrupt that region, so we need to figure out the offset at which
+ // documents will be written to - which is the file size after
+ // initialization.
+ corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
}
{
- // Modify the document log checksum to trigger a complete document log
- // rewind.
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
-
- FileBackedProtoLog<DocumentWrapper>::Header document_log_header;
- filesystem()->PRead(document_log_file.c_str(), &document_log_header,
- sizeof(FileBackedProtoLog<DocumentWrapper>::Header),
- /*offset=*/0);
- // Set a garbage checksum.
- document_log_header.log_checksum = 10;
- document_log_header.header_checksum =
- document_log_header.CalculateHeaderChecksum();
- filesystem()->PWrite(document_log_file.c_str(), /*offset=*/0,
- &document_log_header,
- sizeof(FileBackedProtoLog<DocumentWrapper>::Header));
+ // "Corrupt" the content written in the log. Make the corrupt document
+ // smaller than our original one so we don't accidentally write past our
+ // file.
+ DocumentProto document =
+ DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+ std::string serialized_document = document.SerializeAsString();
+ ASSERT_TRUE(filesystem()->PWrite(
+ document_log_file.c_str(), corruptible_offset,
+ serialized_document.data(), serialized_document.size()));
+
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+ ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+ // Set dirty bit to true to reflect that something changed in the log.
+ header.SetDirtyFlag(true);
+ header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+ WriteDocumentLogHeader(*filesystem(), document_log_file, header);
}
{
@@ -7182,6 +7220,177 @@ TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
}
+// We skip this test case when we're running in a jni_test since the data files
+// will be stored in the android-instrumented storage location, rather than the
+// normal cc_library runfiles directory. To get that storage location, it's
+// recommended to use the TestStorage APIs which handles different API
+// levels/absolute vs relative/etc differences. Since that's only accessible on
+// the java-side, and I haven't figured out a way to pass that directory path to
+// this native side yet, we're just going to disable this. The functionality is
+// already well-tested across 4 different emulated OS's so we're not losing much
+// test coverage here.
+#ifndef ICING_JNI_TEST
+// Disable backwards compat test. This test is enabled in google3, but disabled
+// in jetpack/framework because we didn't want to keep the binary testdata files
+// in our repo.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
+TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
+ // Copy the testdata files into our IcingSearchEngine directory
+ std::string dir_without_portable_log;
+ if (IsAndroidX86()) {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_x86");
+ } else if (IsAndroidArm()) {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_arm");
+ } else if (IsIosPlatform()) {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_ios");
+ } else {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_linux");
+ }
+
+ // Create dst directory that we'll initialize the IcingSearchEngine over.
+ std::string base_dir = GetTestBaseDir() + "_migrate";
+ ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+ ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+ ASSERT_TRUE(filesystem()->CopyDirectory(dir_without_portable_log.c_str(),
+ base_dir.c_str(),
+ /*recursive=*/true));
+
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(base_dir);
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Set up schema, this is the one used to validate documents in the testdata
+ // files. Do not change unless you're also updating the testdata files.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Make sure our schema is still the same as we expect. If not, there's
+ // definitely no way we're getting the documents back that we expect.
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = schema;
+ ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+ // These are the documents that are stored in the testdata files. Do not
+ // change unless you're also updating the testdata files.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddStringProperty("body", "bar")
+ .Build();
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("email")
+ .SetCreationTimestampMs(20)
+ .SetScore(321)
+ .AddStringProperty("body", "baz bat")
+ .Build();
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(30)
+ .SetScore(123)
+ .AddStringProperty("subject", "phoo")
+ .Build();
+
+ // Document 1 and 3 were put normally, and document 2 was deleted in our
+ // testdata files.
+ EXPECT_THAT(icing
+ .Get(document1.namespace_(), document1.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document1));
+ EXPECT_THAT(icing
+ .Get(document2.namespace_(), document2.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing
+ .Get(document3.namespace_(), document3.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document3));
+
+ // Searching for "foo" should get us document1.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("foo");
+
+ SearchResultProto expected_document1;
+ expected_document1.mutable_status()->set_code(StatusProto::OK);
+ *expected_document1.mutable_results()->Add()->mutable_document() = document1;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_document1));
+
+ // Searching for "baz" would've gotten us document2, except it got deleted.
+ // Make sure that it's cleared from our index too.
+ search_spec.set_query("baz");
+
+ SearchResultProto expected_no_documents;
+ expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+
+ // Searching for "phoo" should get us document3.
+ search_spec.set_query("phoo");
+
+ SearchResultProto expected_document3;
+ expected_document3.mutable_status()->set_code(StatusProto::OK);
+ *expected_document3.mutable_results()->Add()->mutable_document() = document3;
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_document3));
+}
+#endif // DISABLE_BACKWARDS_COMPAT_TEST
+#endif // !ICING_JNI_TEST
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/portable/platform.h b/icing/portable/platform.h
index 8712835..150eede 100644
--- a/icing/portable/platform.h
+++ b/icing/portable/platform.h
@@ -34,11 +34,19 @@ inline bool IsReverseJniTokenization() {
return false;
}
-// Whether the running test is an Android test.
-inline bool IsAndroidPlatform() {
-#if defined(__ANDROID__)
+// Whether we're running on android_x86
+inline bool IsAndroidX86() {
+#if defined(__ANDROID__) && defined(__i386__)
return true;
-#endif // defined(__ANDROID__)
+#endif // defined(__ANDROID__) && defined(__i386__)
+ return false;
+}
+
+// Whether we're running on android_armeabi-v7a
+inline bool IsAndroidArm() {
+#if defined(__ANDROID__) && defined(__arm__)
+ return true;
+#endif // defined(__ANDROID__) && defined(__arm__)
return false;
}
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
new file mode 100644
index 0000000..a035f93
--- /dev/null
+++ b/icing/store/document-log-creator.cc
@@ -0,0 +1,206 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-log-creator.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/logging.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Used in DocumentId mapper to mark a document as deleted
+constexpr char kDocumentLogFilename[] = "document_log";
+
+std::string DocumentLogFilenameV0() {
+ // Originally only had this one version, no suffix.
+ return kDocumentLogFilename;
+}
+
+std::string DocumentLogFilenameV1() {
+ return absl_ports::StrCat(kDocumentLogFilename, "_v1");
+}
+
+std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
+}
+
+std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
+}
+
+} // namespace
+
+std::string DocumentLogCreator::GetDocumentLogFilename() {
+ // This should always return the latest version of the document log in use.
+ // The current latest version is V1.
+ return DocumentLogFilenameV1();
+}
+
+libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
+DocumentLogCreator::Create(const Filesystem* filesystem,
+ const std::string& base_dir) {
+ bool v0_exists =
+ filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
+ bool regen_derived_files = false;
+
+#ifdef ENABLE_V1_MIGRATION
+ bool v1_exists =
+ filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
+
+ if (v0_exists && !v1_exists) {
+ ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
+
+ // Need to regenerate derived files since documents may be written to a
+ // different file offset in the log.
+ regen_derived_files = true;
+ } else if (!v1_exists) {
+ // First time initializing a v1 log. There are no existing derived files at
+ // this point, so we should generate some. "regenerate" here also means
+ // "generate for the first time", i.e. we shouldn't expect there to be any
+ // existing derived files.
+ regen_derived_files = true;
+ }
+#else // !ENABLE_V1_MIGRATION
+ if (v0_exists) {
+ // If migration from v0 to v1 is not enabled, then simply delete the v0 file
+ // and treat this as if it's our first time initializing a v1 log.
+ regen_derived_files = true;
+ filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str());
+ }
+#endif // ENABLED_V1_MIGRATION
+
+ ICING_ASSIGN_OR_RETURN(
+ PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
+ log_create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem, MakeDocumentLogFilenameV1(base_dir),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+
+ CreateResult create_result = {std::move(log_create_result),
+ regen_derived_files};
+ return create_result;
+}
+
+libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
+
+ // Our v0 proto log was non-portable, create it so we can read protos out from
+ // it.
+ auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem, MakeDocumentLogFilenameV0(base_dir),
+ FileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true));
+ if (!v0_create_result_or.ok()) {
+ return absl_ports::Annotate(
+ v0_create_result_or.status(),
+ "Failed to initialize v0 document log while migrating.");
+ return v0_create_result_or.status();
+ }
+ FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
+ std::move(v0_create_result_or).ValueOrDie();
+ std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
+ std::move(v0_create_result.proto_log);
+
+ // Create a v1 portable proto log that we will write our protos to.
+ auto v1_create_result_or =
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem, MakeDocumentLogFilenameV1(base_dir),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true));
+ if (!v1_create_result_or.ok()) {
+ return absl_ports::Annotate(
+ v1_create_result_or.status(),
+ "Failed to initialize v1 document log while migrating.");
+ }
+ PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
+ std::move(v1_create_result_or).ValueOrDie();
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
+ std::move(v1_create_result.proto_log);
+
+ // Dummy empty document to be used when copying over deleted documents.
+ DocumentProto empty_document;
+
+ // Start reading out from the old log and putting them in the new log.
+ auto iterator = v0_proto_log->GetIterator();
+ auto iterator_status = iterator.Advance();
+ while (iterator_status.ok()) {
+ libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
+ v0_proto_log->ReadProto(iterator.GetOffset());
+
+ bool deleted_document = false;
+ DocumentWrapper document_wrapper;
+ if (absl_ports::IsNotFound(document_wrapper_or.status())) {
+ // Proto was erased, we can skip copying this into our new log.
+ *document_wrapper.mutable_document() = empty_document;
+ deleted_document = true;
+ } else if (!document_wrapper_or.ok()) {
+ // Some real error, pass up
+ return document_wrapper_or.status();
+ } else {
+ document_wrapper = std::move(document_wrapper_or).ValueOrDie();
+ }
+
+ auto offset_or = v1_proto_log->WriteProto(document_wrapper);
+ if (!offset_or.ok()) {
+ return absl_ports::Annotate(
+ offset_or.status(),
+ "Failed to write proto to v1 document log while migrating.");
+ }
+
+ // If the original document was deleted, erase the proto we just wrote.
+ // We do this to maintain the document_ids, i.e. we still want document_id 2
+ // to point to a deleted document even though we may not have the document
+ // contents anymore. DocumentStore guarantees that the document_ids don't
+ // change unless an Optimize is triggered.
+ if (deleted_document) {
+ int64_t offset = offset_or.ValueOrDie();
+ auto erased_status = v1_proto_log->EraseProto(offset);
+ if (!erased_status.ok()) {
+ return absl_ports::Annotate(
+ erased_status,
+ "Failed to erase proto in v1 document log while migrating.");
+ }
+ }
+
+ iterator_status = iterator.Advance();
+ }
+
+ // Close out our file log pointers.
+ v0_proto_log.reset();
+ v1_proto_log.reset();
+
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
new file mode 100644
index 0000000..51cf497
--- /dev/null
+++ b/icing/store/document-log-creator.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+#define ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Handles creation of the document log and any underlying migrations that may
+// be necessary.
+class DocumentLogCreator {
+ public:
+ struct CreateResult {
+ // The create result passed up from the PortableFileBackedProtoLog::Create.
+ // Contains the document log.
+ PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
+
+ // Whether the caller needs to also regenerate/generate any derived files
+ // based off of the initialized document log.
+ bool regen_derived_files;
+ };
+
+ // Creates the document log in the base_dir. Will create one if it doesn't
+ // already exist.
+ //
+ // This also handles any potential migrations from old document log versions.
+ // At the end of this call, the most up-to-date log will be returned and will
+ // be usable.
+ //
+ // Returns:
+ // CreateResult on success.
+ // INTERNAL on any I/O error.
+ static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
+ const Filesystem* filesystem, const std::string& base_dir);
+
+ // Returns the filename of the document log, without any directory prefixes.
+ // Used mainly for testing purposes.
+ static std::string GetDocumentLogFilename();
+
+ private:
+ // Handles migrating a v0 document log (not portable) to a v1 document log
+ // (portable). This will initialize the log in the beginning, and close it
+ // when migration is done. Callers will need to reinitialize the log on their
+ // own.
+ //
+ // Returns:
+ // OK on success.
+ // INVALID_ARGUMENT if some invalid option was passed to the document log.
+ // INTERNAL on I/O error.
+ static libtextclassifier3::Status MigrateFromV0ToV1(
+ const Filesystem* filesystem, const std::string& base_dir);
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_DOCUMENT_LOG_CREATOR_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 4e63b90..907bace 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -33,6 +33,7 @@
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
@@ -44,6 +45,7 @@
#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
#include "icing/store/usage-store.h"
@@ -62,7 +64,6 @@ namespace {
// Used in DocumentId mapper to mark a document as deleted
constexpr int64_t kDocDeletedFlag = -1;
-constexpr char kDocumentLogFilename[] = "document_log";
constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
constexpr char kScoreCacheFilename[] = "score_cache";
@@ -93,10 +94,6 @@ std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
}
-std::string MakeDocumentLogFilename(const std::string& base_dir) {
- return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename);
-}
-
std::string MakeScoreCacheFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
}
@@ -224,30 +221,36 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
- auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
- filesystem_, MakeDocumentLogFilename(base_dir_),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true));
+ auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_);
+
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
if (!create_result_or.ok()) {
ICING_LOG(ERROR) << create_result_or.status().error_message()
- << "\nFailed to initialize DocumentLog";
+ << "\nFailed to initialize DocumentLog.";
return create_result_or.status();
}
- FileBackedProtoLog<DocumentWrapper>::CreateResult create_result =
+ DocumentLogCreator::CreateResult create_result =
std::move(create_result_or).ValueOrDie();
- document_log_ = std::move(create_result.proto_log);
- if (force_recovery_and_revalidate_documents ||
- create_result.has_data_loss()) {
- if (create_result.has_data_loss() && initialize_stats != nullptr) {
+ document_log_ = std::move(create_result.log_create_result.proto_log);
+
+ if (create_result.regen_derived_files ||
+ force_recovery_and_revalidate_documents ||
+ create_result.log_create_result.has_data_loss()) {
+ // We can't rely on any existing derived files. Recreate them from scratch.
+ // Currently happens if:
+ // 1) This is a new log and we don't have derived files yet
+ // 2) Client wanted us to force a regeneration.
+ // 3) Log has some data loss, can't rely on existing derived data.
+ if (create_result.log_create_result.has_data_loss() &&
+ initialize_stats != nullptr) {
ICING_LOG(WARNING)
<< "Data loss in document log, regenerating derived files.";
initialize_stats->set_document_store_recovery_cause(
InitializeStatsProto::DATA_LOSS);
- if (create_result.data_loss == DataLoss::PARTIAL) {
+ if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) {
// Ground truth is partially lost.
initialize_stats->set_document_store_data_status(
InitializeStatsProto::PARTIAL_LOSS);
@@ -257,10 +260,16 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
InitializeStatsProto::COMPLETE_LOSS);
}
}
+
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status =
RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
- if (initialize_stats != nullptr) {
+ if (initialize_stats != nullptr &&
+ (force_recovery_and_revalidate_documents ||
+ create_result.log_create_result.has_data_loss())) {
+ // Only consider it a recovery if the client forced a recovery or there
+ // was data loss. Otherwise, this could just be the first time we're
+ // initializing and generating derived files.
initialize_stats->set_document_store_recovery_latency_ms(
document_recovery_timer->GetElapsedMilliseconds());
}
@@ -270,7 +279,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
return status;
}
} else {
- if (!InitializeDerivedFiles().ok()) {
+ if (!InitializeExistingDerivedFiles().ok()) {
ICING_VLOG(1)
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for DocumentStore.";
@@ -296,10 +305,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
initialize_stats->set_num_documents(document_id_mapper_->num_elements());
}
- return create_result.data_loss;
+ return create_result.log_create_result.data_loss;
}
-libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
+libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
if (!HeaderExists()) {
// Without a header, we don't know if things are consistent between each
// other so the caller should just regenerate everything from ground
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b0cd1ce..79d99d4 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -26,6 +26,7 @@
#include "icing/file/file-backed-proto-log.h"
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
@@ -438,7 +439,7 @@ class DocumentStore {
// A log used to store all documents, it serves as a ground truth of doc
// store. key_mapper_ and document_id_mapper_ can be regenerated from it.
- std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_;
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
// Key (namespace + uri) to DocumentId mapping
std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
@@ -495,11 +496,35 @@ class DocumentStore {
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats);
+ // Initializes a new DocumentStore and sets up any underlying files.
+ //
+ // Returns:
+ // Data loss status on success, effectively always DataLoss::NONE
+ // INTERNAL on I/O error
+ libtextclassifier3::StatusOr<DataLoss> InitializeNewStore(
+ InitializeStatsProto* initialize_stats);
+
+ // Initializes a DocumentStore over an existing directory of files.
+ //
+ // stats will be set if non-null
+ //
+ // Returns:
+ // Data loss status on success
+ // INTERNAL on I/O error
+ libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore(
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats);
+
+ libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1(
+ InitializeStatsProto* initialize_stats);
+
// Creates sub-components and verifies the integrity of each sub-component.
+ // This assumes that the the underlying files already exist, and will return
+ // an error if it doesn't find what it's expecting.
//
// Returns an error if subcomponents failed to initialize successfully.
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status InitializeDerivedFiles();
+ libtextclassifier3::Status InitializeExistingDerivedFiles();
// Re-generates all files derived from the ground truth: the document log.
//
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index f68e115..ce608fc 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -168,6 +168,93 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
}
BENCHMARK(BM_DoesDocumentExistBenchmark);
+void BM_Put(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document = CreateDocument("namespace", "uri");
+
+ for (auto s : state) {
+ // It's ok that this is the same document over and over. We'll create a new
+ // document_id for it and still insert the proto into the underlying log.
+ benchmark::DoNotOptimize(document_store->Put(document));
+ }
+}
+BENCHMARK(BM_Put);
+
+void BM_GetSameDocument(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK(document_store->Put(CreateDocument("namespace", "uri")));
+
+ for (auto s : state) {
+ benchmark::DoNotOptimize(document_store->Get("namespace", "uri"));
+ }
+}
+BENCHMARK(BM_GetSameDocument);
+
+void BM_Delete(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document = CreateDocument("namespace", "uri");
+
+ for (auto s : state) {
+ state.PauseTiming();
+ ICING_ASSERT_OK(document_store->Put(document));
+ state.ResumeTiming();
+
+ benchmark::DoNotOptimize(document_store->Delete("namespace", "uri"));
+ }
+}
+BENCHMARK(BM_Delete);
+
} // namespace
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index ad3b7c4..3ed4c4e 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -15,6 +15,7 @@
#include "icing/store/document-store.h"
#include <cstdint>
+#include <filesystem>
#include <limits>
#include <memory>
#include <string>
@@ -40,6 +41,7 @@
#include "icing/store/corpus-id.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -105,6 +107,22 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
return usage_report;
}
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path) {
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+ filesystem.PRead(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+ /*offset=*/0);
+ return header;
+}
+
+void WriteDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path,
+ PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+ filesystem.Write(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
class DocumentStoreTest : public ::testing::Test {
protected:
DocumentStoreTest()
@@ -452,14 +470,18 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(
document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
@@ -538,13 +560,17 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
@@ -607,7 +633,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
} // Destructors should update checksum and persist all data to file.
CorruptDocStoreHeaderChecksumFile();
@@ -621,7 +649,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
@@ -730,13 +760,17 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
@@ -809,7 +843,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
} // Destructors should update checksum and persist all data to file.
CorruptDocStoreHeaderChecksumFile();
@@ -823,7 +859,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
@@ -901,7 +939,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
} // Destructors should update checksum and persist all data to file.
CorruptDocStoreHeaderChecksumFile();
@@ -923,7 +963,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
@@ -968,7 +1010,9 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK(doc_store->Put(document2));
ICING_ASSERT_OK(doc_store->Put(document3));
- std::string original_document_log = document_store_dir_ + "/document_log";
+ std::string original_document_log = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
int64_t original_size =
filesystem_.GetFileSize(original_document_log.c_str());
@@ -979,7 +1023,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
HasSubstr("directory is the same")));
std::string optimized_dir = document_store_dir_ + "_optimize";
- std::string optimized_document_log = optimized_dir + "/document_log";
+ std::string optimized_document_log =
+ optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
// Validates that the optimized document log has the same size if nothing is
// deleted
@@ -1067,8 +1112,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
filesystem_.PWrite(document_log_file.c_str(), file_size,
serialized_document.data(), serialized_document.size());
@@ -2919,8 +2964,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
filesystem_.PWrite(document_log_file.c_str(), file_size,
serialized_document.data(), serialized_document.size());
@@ -3043,7 +3088,9 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
const std::string serialized_document = document.SerializeAsString();
const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str();
int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
filesystem_.PWrite(document_log_file.c_str(), file_size,
serialized_document.data(), serialized_document.size());
@@ -3060,8 +3107,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
int64_t corruptible_offset;
- const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3088,8 +3135,30 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// "Corrupt" the persisted content written in the log. We can't recover if
// the persisted data was corrupted.
std::string corruption = "abc";
- filesystem_.PWrite(document_log_file.c_str(), /*offset=*/corruptible_offset,
- corruption.data(), corruption.size());
+ filesystem_.PWrite(document_log_file.c_str(),
+ /*offset=*/corruptible_offset, corruption.data(),
+ corruption.size());
+
+ {
+ // "Corrupt" the content written in the log. Make the corrupt document
+ // smaller than our original one so we don't accidentally write past our
+ // file.
+ DocumentProto document =
+ DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+ std::string serialized_document = document.SerializeAsString();
+ ASSERT_TRUE(filesystem_.PWrite(
+ document_log_file.c_str(), corruptible_offset,
+ serialized_document.data(), serialized_document.size()));
+
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+ ReadDocumentLogHeader(filesystem_, document_log_file);
+
+ // Set dirty bit to true to reflect that something changed in the log.
+ header.SetDirtyFlag(true);
+ header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+ WriteDocumentLogHeader(filesystem_, document_log_file, header);
+ }
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3106,8 +3175,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// the document store header.
//
// This causes a problem now because this cl changes behavior to not consider an
-// InitializeDerivedFiles failure to be a recovery if there is nothing to
-// recover because the doocument store is empty.
+// InitializeExistingDerivedFiles failure to be a recovery if there is nothing
+// to recover because the doocument store is empty.
#define DISABLE_BACKWARDS_COMPAT_TEST
#ifndef DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
@@ -3667,6 +3736,128 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
}
}
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
+TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+ // Set up schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ std::string schema_store_dir = schema_store_dir_ + "_migrate";
+ filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+ // Create dst directory that we'll initialize the DocumentStore over.
+ std::string document_store_dir = document_store_dir_ + "_migrate";
+ ASSERT_THAT(
+ filesystem_.DeleteDirectoryRecursively(document_store_dir.c_str()), true);
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()), true);
+
+ // Copy the testdata files into our DocumentStore directory
+ std::string document_store_without_portable_log;
+ if (IsAndroidX86()) {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_x86/document_dir");
+ } else if (IsAndroidArm()) {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_arm/document_dir");
+ } else if (IsIosPlatform()) {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_ios/document_dir");
+ } else {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_linux/document_dir");
+ }
+
+ ASSERT_TRUE(filesystem_.CopyDirectory(
+ document_store_without_portable_log.c_str(), document_store_dir.c_str(),
+ /*recursive=*/true));
+
+ // Initialize the DocumentStore over our copied files.
+ InitializeStatsProto initialize_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ &initialize_stats));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ // These are the documents that are stored in the testdata files. Do not
+ // change unless you're also updating the testdata files.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddStringProperty("body", "bar")
+ .Build();
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("email")
+ .SetCreationTimestampMs(20)
+ .SetScore(321)
+ .AddStringProperty("body", "baz bat")
+ .Build();
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(30)
+ .SetScore(123)
+ .AddStringProperty("subject", "phoo")
+ .Build();
+
+ // Check that we didn't lose anything. A migration also doesn't technically
+ // count as a recovery.
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause());
+
+ // Document 1 and 3 were put normally, and document 2 was deleted in our
+ // testdata files.
+ //
+ // Check by namespace, uri
+ EXPECT_THAT(document_store->Get(document1.namespace_(), document1.uri()),
+ IsOkAndHolds(EqualsProto(document1)));
+ EXPECT_THAT(document_store->Get(document2.namespace_(), document2.uri()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(document_store->Get(document3.namespace_(), document3.uri()),
+ IsOkAndHolds(EqualsProto(document3)));
+
+ // Check by document_id
+ EXPECT_THAT(document_store->Get(/*document_id=*/0),
+ IsOkAndHolds(EqualsProto(document1)));
+ EXPECT_THAT(document_store->Get(/*document_id=*/1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(document_store->Get(/*document_id=*/2),
+ IsOkAndHolds(EqualsProto(document3)));
+}
+#endif // DISABLE_BACKWARDS_COMPAT_TEST
+
} // namespace
} // namespace lib